diff --git a/Source/Editor/Cooker/Platform/Web/WebPlatformTools.cpp b/Source/Editor/Cooker/Platform/Web/WebPlatformTools.cpp index ab4d075b9..df01500ce 100644 --- a/Source/Editor/Cooker/Platform/Web/WebPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/Web/WebPlatformTools.cpp @@ -8,11 +8,13 @@ #include "Engine/Platform/CreateProcessSettings.h" #include "Engine/Platform/Web/WebPlatformSettings.h" #include "Engine/Core/Types/Span.h" +#include "Engine/Core/Math/Vector2.h" #include "Engine/Core/Config/GameSettings.h" #include "Engine/Core/Config/BuildSettings.h" #include "Engine/Content/Content.h" #include "Engine/Content/JsonAsset.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Graphics/Textures/TextureBase.h" #include "Editor/Cooker/GameCooker.h" IMPLEMENT_SETTINGS_GETTER(WebPlatformSettings, WebPlatform); @@ -21,7 +23,7 @@ namespace { struct WebPlatformCache { - WebPlatformSettings::TextureQuality TexturesQuality; + WebPlatformSettings::TextureCompression TexturesCompression; }; } @@ -53,13 +55,14 @@ DotNetAOTModes WebPlatformTools::UseAOT() const PixelFormat WebPlatformTools::GetTextureFormat(CookingData& data, TextureBase* texture, PixelFormat format) { const auto platformSettings = WebPlatformSettings::Get(); - switch (platformSettings->TexturesQuality) + const auto uncompressed = PixelFormatExtensions::FindUncompressedFormat(format); + switch (platformSettings->TexturesCompression) { - case WebPlatformSettings::TextureQuality::Uncompressed: - return PixelFormatExtensions::FindUncompressedFormat(format); - case WebPlatformSettings::TextureQuality::BC: + case WebPlatformSettings::TextureCompression::Uncompressed: + return uncompressed; + case WebPlatformSettings::TextureCompression::BC: return format; - case WebPlatformSettings::TextureQuality::ASTC: + case WebPlatformSettings::TextureCompression::ASTC: switch (format) { case PixelFormat::BC4_SNorm: @@ -71,10 +74,23 @@ PixelFormat WebPlatformTools::GetTextureFormat(CookingData& data, TextureBase* t case PixelFormat::BC6H_Sf16: case PixelFormat::BC7_Typeless: case PixelFormat::BC7_UNorm: + case PixelFormat::BC7_UNorm_sRGB: return PixelFormat::R16G16B16A16_Float; // TODO: ASTC HDR - default: + default: return PixelFormatExtensions::IsSRGB(format) ? PixelFormat::ASTC_6x6_UNorm_sRGB : PixelFormat::ASTC_6x6_UNorm; } + case WebPlatformSettings::TextureCompression::Basis: + switch (format) + { + case PixelFormat::BC7_Typeless: + case PixelFormat::BC7_UNorm: + case PixelFormat::BC7_UNorm_sRGB: + return PixelFormat::R16G16B16A16_Float; // Basic Universal doesn't support alpha in BC7 (and it can be loaded only from LDR formats) + default: + if (uncompressed != format && texture->Size().MinValue() >= 16) + return PixelFormat::Basis; + return uncompressed; + } default: return format; } @@ -87,7 +103,7 @@ void WebPlatformTools::LoadCache(CookingData& data, IBuildCache* cache, const Sp if (bytes.Length() == sizeof(WebPlatformCache)) { auto* platformCache = (WebPlatformCache*)bytes.Get(); - invalidTextures = platformCache->TexturesQuality != platformSettings->TexturesQuality; + invalidTextures = platformCache->TexturesCompression != platformSettings->TexturesCompression; } if (invalidTextures) { @@ -100,7 +116,7 @@ Array WebPlatformTools::SaveCache(CookingData& data, IBuildCache* cache) { const auto platformSettings = WebPlatformSettings::Get(); WebPlatformCache platformCache; - platformCache.TexturesQuality = platformSettings->TexturesQuality; + platformCache.TexturesCompression = platformSettings->TexturesCompression; Array result; result.Add((const byte*)&platformCache, sizeof(platformCache)); return result; diff --git a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp index 9a9d91e7f..d260496bc 100644 --- a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp @@ -743,6 +743,26 @@ bool ProcessTextureBase(CookAssetsStep::AssetCookData& data) auto chunk = New(); data.InitData.Header.Chunks[mipIndex] = chunk; + if (header.Format == PixelFormat::Basis) + { + // Store as-is + int32 maxDataSize = 0; + for (int32 arrayIndex = 0; arrayIndex < textureData->Items.Count(); arrayIndex++) + { + auto& mipData = textureData->Items[arrayIndex].Mips[mipIndex]; + maxDataSize = Math::Max(maxDataSize, mipData.Data.Length()); + } + chunk->Data.Allocate(maxDataSize * textureData->GetArraySize()); + for (int32 arrayIndex = 0; arrayIndex < textureData->Items.Count(); arrayIndex++) + { + auto& mipData = textureData->Items[arrayIndex].Mips[mipIndex]; + byte* dst = chunk->Data.Get() + maxDataSize * arrayIndex; + Platform::MemoryCopy(dst, mipData.Data.Get(), mipData.Data.Length()); + Platform::MemoryClear(dst + mipData.Data.Length(), maxDataSize - mipData.Data.Length()); + } + continue; + } + // Calculate the texture data storage layout uint32 rowPitch, slicePitch; const int32 mipWidth = Math::Max(1, textureData->Width >> mipIndex); diff --git a/Source/Editor/Windows/Assets/TextureWindow.cs b/Source/Editor/Windows/Assets/TextureWindow.cs index 865099aea..3977d562a 100644 --- a/Source/Editor/Windows/Assets/TextureWindow.cs +++ b/Source/Editor/Windows/Assets/TextureWindow.cs @@ -63,7 +63,9 @@ namespace FlaxEditor.Windows.Assets // Texture info var general = layout.Group("General"); - general.Label("Format: " + texture.Format); + var textureFormat = texture.Format; + var gpuFormat = texture.Texture?.Format ?? textureFormat; + general.Label(textureFormat != gpuFormat && gpuFormat != PixelFormat.Unknown ? $"Format: {textureFormat} ({gpuFormat})" : $"Format: {textureFormat}"); general.Label(string.Format("Size: {0}x{1}", texture.Width, texture.Height)).AddCopyContextMenu(); general.Label("Mip levels: " + texture.MipLevels); general.Label("Memory usage: " + Utilities.Utils.FormatBytesCount(texture.TotalMemoryUsage)).AddCopyContextMenu(); diff --git a/Source/Engine/ContentImporters/ImportTexture.cpp b/Source/Engine/ContentImporters/ImportTexture.cpp index af1e4c917..866e38c44 100644 --- a/Source/Engine/ContentImporters/ImportTexture.cpp +++ b/Source/Engine/ContentImporters/ImportTexture.cpp @@ -159,6 +159,8 @@ CreateAssetResult ImportTexture::Create(CreateAssetContext& context, const Textu textureHeader.IsSRGB = PixelFormatExtensions::IsSRGB(textureHeader.Format); textureHeader.IsCubeMap = isCubeMap; textureHeader.TextureGroup = options.TextureGroup; + if (textureHeader.Format == PixelFormat::Basis) + textureHeader.IsSRGB = options.sRGB; // Basis format can be either sRGB or linear, so use option to decide that ASSERT(textureHeader.MipLevels <= GPU_MAX_TEXTURE_MIP_LEVELS); // Save header diff --git a/Source/Engine/Graphics/PixelFormat.h b/Source/Engine/Graphics/PixelFormat.h index 2eba8736d..5bd9429c0 100644 --- a/Source/Engine/Graphics/PixelFormat.h +++ b/Source/Engine/Graphics/PixelFormat.h @@ -563,6 +563,11 @@ API_ENUM() enum class PixelFormat : uint32 /// NV12 = 109, + /// + /// Basis Universal format (.basis file data). Uses supercompressed texture transcoding system. + /// + Basis = 110, + /// /// The maximum format value (for internal use only). /// diff --git a/Source/Engine/Graphics/PixelFormatExtensions.cpp b/Source/Engine/Graphics/PixelFormatExtensions.cpp index 648a1cd2a..9dee8675f 100644 --- a/Source/Engine/Graphics/PixelFormatExtensions.cpp +++ b/Source/Engine/Graphics/PixelFormatExtensions.cpp @@ -419,14 +419,21 @@ bool PixelFormatExtensions::IsHDR(const PixelFormat format) { switch (format) { - case PixelFormat::R11G11B10_Float: - case PixelFormat::R10G10B10A2_UNorm: - case PixelFormat::R16G16B16A16_Float: - case PixelFormat::R32G32B32A32_Float: - case PixelFormat::R16G16_Float: case PixelFormat::R16_Float: - case PixelFormat::BC6H_Sf16: + case PixelFormat::R16G16_Float: + case PixelFormat::R16G16B16A16_Float: + case PixelFormat::R32_Float: + case PixelFormat::R32G32_Float: + case PixelFormat::R32G32B32_Float: + case PixelFormat::R32G32B32A32_Float: + case PixelFormat::R11G11B10_Float: + case PixelFormat::R9G9B9E5_SharedExp: + case PixelFormat::BC6H_Typeless: case PixelFormat::BC6H_Uf16: + case PixelFormat::BC6H_Sf16: + case PixelFormat::BC7_Typeless: + case PixelFormat::BC7_UNorm: + case PixelFormat::BC7_UNorm_sRGB: return true; default: return false; diff --git a/Source/Engine/Graphics/PixelFormatExtensions.h b/Source/Engine/Graphics/PixelFormatExtensions.h index 27fc812ca..d707fd3bf 100644 --- a/Source/Engine/Graphics/PixelFormatExtensions.h +++ b/Source/Engine/Graphics/PixelFormatExtensions.h @@ -117,10 +117,10 @@ public: API_FUNCTION() static bool IsSRGB(PixelFormat format); /// - /// Determines whether the specified is HDR (either 16 or 32bits Float) + /// Determines whether the specified is High Dynamic Range (HDR) - can store color values larger than 1. /// /// The format. - /// true if the specified pixel format is HDR (Floating poInt); otherwise, false. + /// true if the specified pixel format is High Dynamic Range (HDR); otherwise, false. API_FUNCTION() static bool IsHDR(PixelFormat format); /// diff --git a/Source/Engine/Graphics/Textures/StreamingTexture.cpp b/Source/Engine/Graphics/Textures/StreamingTexture.cpp index 71d209301..8d8e65c95 100644 --- a/Source/Engine/Graphics/Textures/StreamingTexture.cpp +++ b/Source/Engine/Graphics/Textures/StreamingTexture.cpp @@ -9,6 +9,7 @@ #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h" #include "Engine/Scripting/Enums.h" +#include "Engine/Tools/TextureTool/TextureTool.h" TextureHeader_Deprecated::TextureHeader_Deprecated() { @@ -142,7 +143,10 @@ void StreamingTexture::UnloadTexture() uint64 StreamingTexture::GetTotalMemoryUsage() const { const uint64 arraySize = _header.IsCubeMap ? 6 : 1; - return RenderTools::CalculateTextureMemoryUsage(_header.Format, _header.Width, _header.Height, _header.MipLevels) * arraySize; + PixelFormat format = _header.Format; + if (_texture && _texture->IsAllocated()) + format = _texture->Format(); + return RenderTools::CalculateTextureMemoryUsage(format, _header.Width, _header.Height, _header.MipLevels) * arraySize; } String StreamingTexture::ToString() const @@ -294,13 +298,14 @@ Task* StreamingTexture::UpdateAllocation(int32 residency) GPUTextureDescription desc; if (IsCubeMap()) { - ASSERT(width == height); + ASSERT_LOW_LAYER(width == height); desc = GPUTextureDescription::NewCube(width, residency, _header.Format, GPUTextureFlags::ShaderResource); } else { desc = GPUTextureDescription::New2D(width, height, residency, _header.Format, GPUTextureFlags::ShaderResource); } + desc.Format = TextureTool::GetTextureFormat(_header.Type, desc.Format, desc.Width, desc.Height, _header.IsSRGB); // Setup texture if (texture->Init(desc)) @@ -367,16 +372,9 @@ protected: Result run(GPUTasksContext* context) override { const auto texture = _texture.Get(); - if (texture == nullptr) + if (texture == nullptr || !texture->IsAllocated()) return Result::MissingResources; - // Ensure that texture has been allocated before this task and has proper format - if (!texture->IsAllocated() || texture->Format() != _streamingTexture->GetHeader()->Format) - { - LOG(Error, "Cannot stream texture {0} (streaming format: {1})", texture->ToString(), ScriptingEnum::ToString(_streamingTexture->GetHeader()->Format)); - return Result::Failed; - } - // Get asset data BytesContainer data; const auto absoluteMipIndex = _streamingTexture->TextureMipIndexToTotalIndex(_mipIndex); @@ -390,14 +388,18 @@ protected: if (!_streamingTexture->GetOwner()->GetMipDataCustomPitch(absoluteMipIndex, rowPitch, slicePitch)) texture->ComputePitch(_mipIndex, rowPitch, slicePitch); _data.Link(data); - ASSERT(data.Length() >= (int32)slicePitch * arraySize); // Update all array slices const byte* dataSource = data.Get(); + int32 dataPerSlice = data.Length() / arraySize; // In most cases it's a slice pitch, except when using transcoding (eg. Basis), then each slice has to use the same amount of memory for (int32 arrayIndex = 0; arrayIndex < arraySize; arrayIndex++) { - context->GPU->UpdateTexture(texture, arrayIndex, _mipIndex, dataSource, rowPitch, slicePitch); - dataSource += slicePitch; + if (TextureTool::UpdateTexture(context->GPU, texture, arrayIndex, _mipIndex, Span(dataSource, dataPerSlice), rowPitch, slicePitch, _streamingTexture->GetHeader()->Format)) + { + LOG(Error, "Cannot stream mip {} of texture {} (format: {})", _mipIndex, texture->ToString(), ScriptingEnum::ToString(_streamingTexture->GetHeader()->Format)); + return Result::Failed; + } + dataSource += dataPerSlice; } return Result::Ok; diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index 4a8d138ed..7a504c93f 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -9,7 +9,7 @@ // @formatter:off -VkFormat RenderToolsVulkan::PixelFormatToVkFormat[110] = +VkFormat RenderToolsVulkan::PixelFormatToVkFormat[111] = { VK_FORMAT_UNDEFINED, VK_FORMAT_R32G32B32A32_SFLOAT, @@ -121,6 +121,7 @@ VkFormat RenderToolsVulkan::PixelFormatToVkFormat[110] = VK_FORMAT_ASTC_10x10_SRGB_BLOCK, VK_FORMAT_G8B8G8R8_422_UNORM, // YUY2 VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, // NV12 + VK_FORMAT_UNDEFINED, // Basis }; VkBlendOp RenderToolsVulkan::OperationToVkBlendOp[6] = @@ -140,9 +141,9 @@ VkCompareOp RenderToolsVulkan::ComparisonFuncToVkCompareOp[9] = VK_COMPARE_OP_LESS, // Less VK_COMPARE_OP_EQUAL, // Equal VK_COMPARE_OP_LESS_OR_EQUAL, // LessEqual - VK_COMPARE_OP_GREATER, // Grather + VK_COMPARE_OP_GREATER, // Greater VK_COMPARE_OP_NOT_EQUAL, // NotEqual - VK_COMPARE_OP_GREATER_OR_EQUAL, // GratherEqual + VK_COMPARE_OP_GREATER_OR_EQUAL, // GreaterEqual VK_COMPARE_OP_ALWAYS, // Always }; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 57f9023d8..7935b5a07 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -471,7 +471,9 @@ bool GPUDeviceWebGPU::Init() if (features.Contains(WGPUFeatureName_TextureCompressionBC)) { auto supportBc = + FormatSupport::Texture1D | FormatSupport::Texture2D | + FormatSupport::TextureCube | FormatSupport::ShaderLoad | FormatSupport::ShaderSample | FormatSupport::Mip; @@ -495,7 +497,9 @@ bool GPUDeviceWebGPU::Init() if (features.Contains(WGPUFeatureName_TextureCompressionASTC)) { auto supportAstc = + FormatSupport::Texture1D | FormatSupport::Texture2D | + FormatSupport::TextureCube | FormatSupport::ShaderLoad | FormatSupport::ShaderSample | FormatSupport::Mip; diff --git a/Source/Engine/Platform/Web/WebPlatformSettings.h b/Source/Engine/Platform/Web/WebPlatformSettings.h index 5acc3c6e3..09b1a9ce4 100644 --- a/Source/Engine/Platform/Web/WebPlatformSettings.h +++ b/Source/Engine/Platform/Web/WebPlatformSettings.h @@ -15,9 +15,9 @@ API_CLASS(Sealed, Namespace="FlaxEditor.Content.Settings") class FLAXENGINE_API API_AUTO_SERIALIZATION(); /// - /// The output textures quality (compression). + /// The output texture compression mode. /// - API_ENUM() enum class TextureQuality + API_ENUM() enum class TextureCompression { // Raw image data without any compression algorithm. Mostly for testing or compatibility. Uncompressed, @@ -25,13 +25,15 @@ API_CLASS(Sealed, Namespace="FlaxEditor.Content.Settings") class FLAXENGINE_API BC, // Converts compressed textures into ASTC format that is supported on Mobile but not on Desktop. ASTC, + // Converts compressed textures into Basis Universal format that is supercompressed - can be quickly decoded into GPU native format on demand (BC1-7 or ASTC). Supported on all platforms. + Basis, }; /// - /// The output textures quality (compression). + /// The output textures compression mode. /// API_FIELD(Attributes = "EditorOrder(500), EditorDisplay(\"General\")") - TextureQuality TexturesQuality = TextureQuality::Uncompressed; + TextureCompression TexturesCompression = TextureCompression::Basis; /// /// Gets the instance of the settings asset (default value if missing). Object returned by this method is always loaded with valid data to use. diff --git a/Source/Engine/Tools/TextureTool/TextureTool.Build.cs b/Source/Engine/Tools/TextureTool/TextureTool.Build.cs index bfb8fc0d7..c87a5b4ec 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.Build.cs +++ b/Source/Engine/Tools/TextureTool/TextureTool.Build.cs @@ -64,12 +64,18 @@ public class TextureTool : EngineModule { options.PrivateDependencies.Add("tinyexr"); } - if (options.Target.IsEditor && astc.IsSupported(options)) + if (options.Target.IsEditor && astc.Use(options)) { // ASTC for mobile (iOS and Android) options.SourceFiles.Add(Path.Combine(FolderPath, "TextureTool.astc.cpp")); options.PrivateDependencies.Add("astc"); } + if ((options.Target.IsEditor || options.Platform.Target == TargetPlatform.Web) && basis_universal.Use(options)) + { + // Universal texture compression for Web + options.SourceFiles.Add(Path.Combine(FolderPath, "TextureTool.basis_universal.cpp")); + options.PrivateDependencies.Add("basis_universal"); + } options.PublicDefinitions.Add("COMPILE_WITH_TEXTURE_TOOL"); } diff --git a/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp b/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp index cb758babd..3250242ec 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp @@ -1082,6 +1082,15 @@ bool TextureTool::ConvertDirectXTex(TextureData& dst, const TextureData& src, co return true; #endif } + if (dstFormat == PixelFormat::Basis) + { +#if COMPILE_WITH_BASISU + return ConvertBasisUniversal(dst, src); +#else + LOG(Error, "Missing Basis Universal texture format compression lib."); + return true; +#endif + } HRESULT result; DirectX::ScratchImage dstImage; diff --git a/Source/Engine/Tools/TextureTool/TextureTool.astc.cpp b/Source/Engine/Tools/TextureTool/TextureTool.astc.cpp index d51c5ccab..ce1085204 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.astc.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.astc.cpp @@ -11,7 +11,7 @@ #include "Engine/Profiler/ProfilerCPU.h" #include -bool TextureTool::ConvertAstc(TextureData& dst, const TextureData& src, const PixelFormat dstFormat) +bool TextureTool::ConvertAstc(TextureData& dst, const TextureData& src, PixelFormat dstFormat) { PROFILE_CPU(); ASSERT(PixelFormatExtensions::IsCompressedASTC(dstFormat)); @@ -21,7 +21,7 @@ bool TextureTool::ConvertAstc(TextureData& dst, const TextureData& src, const Pi // Configure the compressor run const bool isSRGB = PixelFormatExtensions::IsSRGB(dstFormat); const bool isHDR = PixelFormatExtensions::IsHDR(src.Format); - astcenc_profile astcProfile = isHDR ? ASTCENC_PRF_HDR_RGB_LDR_A : (isSRGB ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR); + astcenc_profile astcProfile = isHDR ? ASTCENC_PRF_HDR : (isSRGB ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR); float astcQuality = ASTCENC_PRE_MEDIUM; unsigned int astcFlags = 0; // TODO: add custom flags support for converter to handle ASTCENC_FLG_MAP_NORMAL astcenc_config astcConfig; diff --git a/Source/Engine/Tools/TextureTool/TextureTool.basis_universal.cpp b/Source/Engine/Tools/TextureTool/TextureTool.basis_universal.cpp new file mode 100644 index 000000000..79e7ded1e --- /dev/null +++ b/Source/Engine/Tools/TextureTool/TextureTool.basis_universal.cpp @@ -0,0 +1,323 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#if COMPILE_WITH_TEXTURE_TOOL && COMPILE_WITH_BASISU + +#include "TextureTool.h" +#include "Engine/Core/Log.h" +#include "Engine/Scripting/Enums.h" +#include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/Textures/TextureData.h" +#include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Graphics/RenderTools.h" +#include "Engine/Platform/CPUInfo.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include +#if USE_EDITOR +#include +#endif + +bool TextureTool::ConvertBasisUniversal(TextureData& dst, const TextureData& src) +{ +#if USE_EDITOR + PROFILE_CPU(); + static bool Init = true; + if (Init) + { + Init = false; + basisu::basisu_encoder_init(); + LOG(Info, "Compressing with Basis Universal " BASISU_LIB_VERSION_STRING); + } + + // Decompress/convert source image + TextureData const* textureData = &src; + TextureData converted; + const PixelFormat inputFormat = PixelFormatExtensions::IsHDR(src.Format) ? PixelFormat::R32G32B32A32_Float : (PixelFormatExtensions::IsSRGB(src.Format) ? PixelFormat::R8G8B8A8_UNorm_sRGB : PixelFormat::R8G8B8A8_UNorm); + if (src.Format != inputFormat) + { + if (textureData != &src) + converted = src; + if (!TextureTool::Convert(converted, *textureData, inputFormat)) + textureData = &converted; + } + + // Initialize the compressor parameters + basisu::basis_compressor_params comp_params; + basist::basis_tex_format mode; + if (PixelFormatExtensions::IsHDR(src.Format)) + { + comp_params.m_source_images_hdr.resize(1); + mode = basist::basis_tex_format::cUASTC_HDR_4x4; + } + else + { + comp_params.m_source_images.resize(1); + switch (src.Format) + { + case PixelFormat::ASTC_4x4_UNorm: + case PixelFormat::ASTC_4x4_UNorm_sRGB: + mode = basist::basis_tex_format::cXUASTC_LDR_4x4; + break; + case PixelFormat::ASTC_6x6_UNorm: + case PixelFormat::ASTC_6x6_UNorm_sRGB: + mode = basist::basis_tex_format::cXUASTC_LDR_6x6; + break; + case PixelFormat::ASTC_8x8_UNorm: + case PixelFormat::ASTC_8x8_UNorm_sRGB: + mode = basist::basis_tex_format::cXUASTC_LDR_8x8; + break; + case PixelFormat::ASTC_10x10_UNorm: + case PixelFormat::ASTC_10x10_UNorm_sRGB: + mode = basist::basis_tex_format::cXUASTC_LDR_10x10; + break; + default: + mode = basist::basis_tex_format::cXUASTC_LDR_4x4; + break; + } + } + comp_params.set_format_mode_and_quality_effort(mode, 80); // TODO: expose quality somehow + comp_params.set_srgb_options(PixelFormatExtensions::IsSRGB(src.Format)); + if (textureData->Depth > 1) + comp_params.m_tex_type = basist::cBASISTexTypeVolume; + else + comp_params.m_tex_type = basist::cBASISTexType2D; + comp_params.m_use_opencl = false; + comp_params.m_write_output_basis_or_ktx2_files = false; + comp_params.m_create_ktx2_file = false; +#if !BASISD_SUPPORT_KTX2_ZSTD + comp_params.m_xuastc_ldr_syntax = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith; +#endif + comp_params.m_print_stats = false; + comp_params.m_compute_stats = false; + comp_params.m_status_output = false; +#if 0 // Enable LinkAsConsoleProgram in FlaxEditor.Build.cs to see debug output in console + basisu::enable_debug_printf(true); + comp_params.m_debug = true; +#endif +#if 0 // Debug exporting of the images + comp_params.m_debug_images = true; + TextureTool::ExportTexture(TEXT("textureData.png"), *textureData); + TextureTool::ExportTexture(TEXT("src.png"), src); +#endif + + // Initialize a job pool + uint32_t num_threads = Math::Min(8u, Platform::GetCPUInfo().ProcessorCoreCount); // Incl. calling thread + basisu::job_pool job_pool(num_threads); + comp_params.m_pJob_pool = &job_pool; + comp_params.m_multithreading = num_threads > 1; + + // Setup output + dst.Items.Resize(textureData->Items.Count()); + dst.Width = textureData->Width; + dst.Height = textureData->Height; + dst.Depth = textureData->Depth; + dst.Format = PixelFormat::Basis; + + // Compress all array slices + for (int32 arrayIndex = 0; arrayIndex < textureData->Items.Count(); arrayIndex++) + { + const auto& srcSlice = textureData->Items[arrayIndex]; + auto& dstSlice = dst.Items[arrayIndex]; + auto mipLevels = srcSlice.Mips.Count(); + dstSlice.Mips.Resize(mipLevels, false); + + // Compress all mip levels + for (int32 mipIndex = 0; mipIndex < mipLevels; mipIndex++) + { + const auto& srcMip = srcSlice.Mips[mipIndex]; + auto& dstMip = dstSlice.Mips[mipIndex]; + auto mipWidth = Math::Max(textureData->Width >> mipIndex, 1); + auto mipHeight = Math::Max(textureData->Height >> mipIndex, 1); + + // Provide source texture data + if (PixelFormatExtensions::IsHDR(textureData->Format)) + { + ASSERT(textureData->Format == PixelFormat::R32G32B32A32_Float); + auto& img = comp_params.m_source_images_hdr[0]; + img.crop(mipWidth, mipHeight, mipWidth * sizeof(basisu::vec4F)); + for (int32 y = 0; y < mipHeight; y++) + Platform::MemoryCopy(&img.get_ptr()[y * img.get_pitch()], &srcMip.Get(0, y), img.get_pitch()); + } + else + { + ASSERT(textureData->Format == PixelFormat::R8G8B8A8_UNorm || textureData->Format == PixelFormat::R8G8B8A8_UNorm_sRGB); + ASSERT(srcMip.RowPitch == mipWidth * sizeof(uint32)); + ASSERT(srcMip.Lines == mipHeight); + ASSERT(srcMip.DepthPitch == mipWidth * mipHeight * sizeof(uint32)); + auto& img = comp_params.m_source_images[0]; + img.init(srcMip.Data.Get(), mipWidth, mipHeight, 4); + if (src.Format == PixelFormat::BC5_Typeless || src.Format == PixelFormat::BC5_UNorm || src.Format == PixelFormat::R16G16_UNorm) + { + // BC5 blocks are exported from Basis using Red and Alpha channels while engine uses Red and Green channels + for (int32_t y = 0; y < mipHeight; y++) + { + for (int32_t x = 0; x < mipWidth; x++) + { + basisu::color_rgba& pixel = img(x, y); + pixel.a = pixel.g; + } + } + } + } + + // Compress image + basisu::basis_compressor comp; + { + PROFILE_CPU_NAMED("basis_compressor"); + if (!comp.init(comp_params)) + { + LOG(Error, "basis_compressor::init() failed"); + return true; + } + basisu::basis_compressor::error_code error = comp.process(); + if (error != basisu::basis_compressor::cECSuccess) + { + LOG(Error, "basis_compressor::process() failed with error code {}", (uint32)error); + return true; + } + } + + // Copy output data + { + PROFILE_CPU_NAMED("Copy"); + const basisu::uint8_vec& basisFile = comp.get_output_basis_file(); + dstMip.DepthPitch = dstMip.RowPitch = (int32)basisFile.size(); + dstMip.Lines = 1; + dstMip.Data.Copy(basisFile.data(), (int32)basisFile.size()); + } + } + } + + return false; +#else + LOG(Error, "Building Basis Universal textures is not supported on this platform."); + return true; +#endif +} + +bool TextureTool::UpdateTextureBasisUniversal(GPUContext* context, GPUTexture* texture, int32 arrayIndex, int32 mipIndex, Span data, uint32 rowPitch, uint32 slicePitch, PixelFormat dataFormat) +{ + PROFILE_CPU_NAMED("Basis Universal"); + PROFILE_MEM(GraphicsTextures); + static bool Init = true; + if (Init) + { + Init = false; + basist::basisu_transcoder_init(); + } + + // Open Basis file + basist::basisu_transcoder transcoder; + basist::basisu_file_info fileInfo; + basist::basisu_image_info imageInfo; + if (!transcoder.get_file_info(data.Get(), data.Length(), fileInfo) || !transcoder.get_image_info(data.Get(), data.Length(), imageInfo, 0)) + { + LOG(Error, "Failed to read Basis file"); + return true; + } + if (!transcoder.start_transcoding(data.Get(), data.Length())) + { + LOG(Error, "Failed to start transcoding Basis file"); + return true; + } + const int32 mipWidth = Math::Max(texture->Width() >> mipIndex, 1); + const int32 mipHeight = Math::Max(texture->Height() >> mipIndex, 1); + if (mipWidth != imageInfo.m_orig_width || mipHeight != imageInfo.m_orig_height) + { + LOG(Error, "Failed to transcode Basis file of size {}x{} into texture of size {}x{}", imageInfo.m_orig_width, imageInfo.m_orig_height, texture->Width(), texture->Height()); + return true; + } + basist::transcoder_texture_format transcoder_tex_fmt; + PixelFormat textureFormat = texture->Format(); + switch (textureFormat) + { + case PixelFormat::BC1_Typeless: + case PixelFormat::BC1_UNorm: + case PixelFormat::BC1_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC1_RGB; + break; + case PixelFormat::BC3_Typeless: + case PixelFormat::BC3_UNorm: + case PixelFormat::BC3_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC3_RGBA; + break; + case PixelFormat::BC4_Typeless: + case PixelFormat::BC4_UNorm: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC4_R; + break; + case PixelFormat::BC5_Typeless: + case PixelFormat::BC5_UNorm: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC5_RG; + break; + case PixelFormat::BC6H_Typeless: + case PixelFormat::BC6H_Uf16: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC6H; + break; + case PixelFormat::BC7_Typeless: + case PixelFormat::BC7_UNorm: + case PixelFormat::BC7_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBC7_RGBA; + break; + case PixelFormat::ASTC_4x4_UNorm: + case PixelFormat::ASTC_4x4_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFASTC_LDR_4x4_RGBA; + break; + case PixelFormat::ASTC_6x6_UNorm: + case PixelFormat::ASTC_6x6_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFASTC_LDR_6x6_RGBA; + break; + case PixelFormat::ASTC_8x8_UNorm: + case PixelFormat::ASTC_8x8_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFASTC_LDR_8x8_RGBA; + break; + case PixelFormat::ASTC_10x10_UNorm: + case PixelFormat::ASTC_10x10_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFASTC_LDR_10x10_RGBA; + break; + case PixelFormat::R8G8B8A8_UNorm: + case PixelFormat::R8G8B8A8_UNorm_sRGB: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGBA32; + break; + case PixelFormat::B5G6R5_UNorm: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFBGR565; + break; + case PixelFormat::R16G16B16A16_Typeless: + case PixelFormat::R16G16B16A16_Float: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGBA_HALF; + break; + case PixelFormat::R9G9B9E5_SharedExp: + transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGB_9E5; + break; + default: + LOG(Error, "Unsupported texture format {} to transcode Basis from {}", ScriptingEnum::ToString(textureFormat), String(basist::basis_get_tex_format_name(fileInfo.m_tex_format))); + return true; + } + if (!basist::basis_is_format_supported(transcoder_tex_fmt, fileInfo.m_tex_format)) + { + LOG(Error, "Basis library doesn't support transcoding texture format {} into {}", String(basist::basis_get_tex_format_name(fileInfo.m_tex_format)), String(basist::basis_get_format_name(transcoder_tex_fmt))); + return true; + } + + // Allocate memory + // TODO: use some transient frame allocator for that + Array transcodedData; + const bool isBlockUncompressed = basist::basis_transcoder_format_is_uncompressed(transcoder_tex_fmt); + uint32 transcodedSize = basist::basis_compute_transcoded_image_size_in_bytes(transcoder_tex_fmt, imageInfo.m_orig_width, imageInfo.m_orig_height); + uint32 transcadedBlocksOrPixels = isBlockUncompressed ? imageInfo.m_orig_width * imageInfo.m_orig_height : imageInfo.m_total_blocks; + transcodedData.Resize(transcodedSize); + + // Transcode to the GPU format + uint32 decodeFlags = 0; // basist::basisu_decode_flags + if (!transcoder.transcode_image_level(data.Get(), data.Length(), 0, 0, transcodedData.Get(), transcadedBlocksOrPixels, transcoder_tex_fmt, decodeFlags)) + { + LOG(Error, "Failed to transcode Basis image of size {}x{} from format {} to {} ({})", imageInfo.m_orig_width, imageInfo.m_orig_height, String(basist::basis_get_tex_format_name(fileInfo.m_tex_format)), String(basist::basis_get_format_name(transcoder_tex_fmt)), ScriptingEnum::ToString(textureFormat)); + return false; + } + + // Send data to the GPU + slicePitch = transcodedData.Count(); + rowPitch = slicePitch / (isBlockUncompressed ? imageInfo.m_orig_height : imageInfo.m_num_blocks_y); + context->UpdateTexture(texture, arrayIndex, mipIndex, transcodedData.Get(), rowPitch, slicePitch); + return false; +} + +#endif diff --git a/Source/Engine/Tools/TextureTool/TextureTool.cpp b/Source/Engine/Tools/TextureTool/TextureTool.cpp index b892385da..7bb00e290 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.cpp @@ -11,6 +11,8 @@ #include "Engine/Serialization/JsonWriter.h" #include "Engine/Serialization/JsonTools.h" #include "Engine/Scripting/Enums.h" +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/PixelFormatSampler.h" #include "Engine/Graphics/Textures/TextureData.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -391,6 +393,91 @@ bool TextureTool::Resize(TextureData& dst, const TextureData& src, int32 dstWidt #endif } +bool TextureTool::UpdateTexture(GPUContext* context, GPUTexture* texture, int32 arrayIndex, int32 mipIndex, Span data, uint32 rowPitch, uint32 slicePitch, PixelFormat dataFormat) +{ + PixelFormat textureFormat = texture->Format(); + + // Basis Universal data transcoded into the runtime GPU format (supercompressed texture) + if (dataFormat == PixelFormat::Basis) + { +#if COMPILE_WITH_BASISU + return UpdateTextureBasisUniversal(context, texture, arrayIndex, mipIndex, data, rowPitch, slicePitch, dataFormat); +#else + LOG(Error, "Loading Basis Universal textures is not supported on this platform."); +#endif + } + + // Update texture (if format matches) + if (textureFormat == dataFormat) + { + context->UpdateTexture(texture, arrayIndex, mipIndex, data.Get(), rowPitch, slicePitch); + } + return textureFormat != dataFormat; +} + +PixelFormat TextureTool::GetTextureFormat(TextureFormatType textureType, PixelFormat dataFormat, int32 width, int32 height, bool sRGB) +{ +#define CHECK_BLOCK_SIZE(x, y) (width % x == 0 && height % y == 0) + + // Basis Universal data transcoded into the runtime GPU format (supercompressed texture) + if (dataFormat == PixelFormat::Basis) + { + ASSERT(GPUDevice::Instance); + auto minSupport = FormatSupport::Texture2D | FormatSupport::ShaderSample | FormatSupport::Mip; + + // Check ASTC formats + if (EnumHasAllFlags(GPUDevice::Instance->GetFormatFeatures(PixelFormat::ASTC_4x4_UNorm).Support, minSupport) && CHECK_BLOCK_SIZE(4, 4)) + return sRGB ? PixelFormat::ASTC_4x4_UNorm_sRGB : PixelFormat::ASTC_4x4_UNorm; + if (EnumHasAllFlags(GPUDevice::Instance->GetFormatFeatures(PixelFormat::ASTC_6x6_UNorm).Support, minSupport) && CHECK_BLOCK_SIZE(6, 6)) + return sRGB ? PixelFormat::ASTC_6x6_UNorm_sRGB : PixelFormat::ASTC_6x6_UNorm; + if (EnumHasAllFlags(GPUDevice::Instance->GetFormatFeatures(PixelFormat::ASTC_8x8_UNorm).Support, minSupport) && CHECK_BLOCK_SIZE(8, 8)) + return sRGB ? PixelFormat::ASTC_8x8_UNorm_sRGB : PixelFormat::ASTC_8x8_UNorm; + if (EnumHasAllFlags(GPUDevice::Instance->GetFormatFeatures(PixelFormat::ASTC_10x10_UNorm).Support, minSupport) && CHECK_BLOCK_SIZE(10, 10)) + return sRGB ? PixelFormat::ASTC_10x10_UNorm_sRGB : PixelFormat::ASTC_10x10_UNorm; + + // Check BCn formats + if (EnumHasAllFlags(GPUDevice::Instance->GetFormatFeatures(PixelFormat::BC3_UNorm).Support, minSupport) && CHECK_BLOCK_SIZE(4, 4)) + { + switch (textureType) + { + case TextureFormatType::ColorRGB: + return sRGB ? PixelFormat::BC1_UNorm_sRGB : PixelFormat::BC1_UNorm; + case TextureFormatType::ColorRGBA: + return sRGB ? PixelFormat::BC3_UNorm_sRGB : PixelFormat::BC3_UNorm; + case TextureFormatType::NormalMap: + return PixelFormat::BC5_UNorm; + case TextureFormatType::GrayScale: + return PixelFormat::BC4_UNorm; + // Basic Universal doesn't support alpha in BC7 (and it can be loaded only from LDR formats) + /*case TextureFormatType::HdrRGBA: + return PixelFormat::BC7_UNorm;*/ + case TextureFormatType::HdrRGB: + return PixelFormat::BC6H_Uf16; + } + } + + // Use raw uncompressed as fallback + switch (textureType) + { + case TextureFormatType::ColorRGB: + case TextureFormatType::ColorRGBA: + return sRGB ? PixelFormat::R8G8B8A8_UNorm_sRGB : PixelFormat::R8G8B8A8_UNorm; + case TextureFormatType::NormalMap: + return PixelFormat::R8G8B8A8_UNorm; + case TextureFormatType::GrayScale: + return PixelFormat::R8G8B8A8_UNorm; + case TextureFormatType::HdrRGBA: + case TextureFormatType::HdrRGB: + return PixelFormat::R16G16B16A16_Float; + default: + return PixelFormat::Unknown; + } + } + +#undef CHECK_BLOCK_SIZE + return dataFormat; +} + PixelFormat TextureTool::ToPixelFormat(TextureFormatType format, int32 width, int32 height, bool canCompress) { const bool canUseBlockCompression = width % 4 == 0 && height % 4 == 0; diff --git a/Source/Engine/Tools/TextureTool/TextureTool.h b/Source/Engine/Tools/TextureTool/TextureTool.h index 53f6c13a0..5d1160d39 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.h +++ b/Source/Engine/Tools/TextureTool/TextureTool.h @@ -139,7 +139,7 @@ API_CLASS(Namespace="FlaxEngine.Tools", Static) class FLAXENGINE_API TextureTool public: #if USE_EDITOR /// - /// Checks whenever the given texture file contains alpha channel data with values different than solid fill of 1 (non fully opaque). + /// Checks whenever the given texture file contains alpha channel data with values different from solid fill of 1 (non fully opaque). /// /// The file path. /// True if has alpha channel, otherwise false. @@ -173,7 +173,7 @@ public: static bool ExportTexture(const StringView& path, const TextureData& textureData); /// - /// Converts the specified source texture data into an another format. + /// Converts the specified source texture data into another format. /// /// The destination data. /// The source data. @@ -182,7 +182,7 @@ public: static bool Convert(TextureData& dst, const TextureData& src, const PixelFormat dstFormat); /// - /// Resizes the specified source texture data into an another dimensions. + /// Resizes the specified source texture data into another dimensions. /// /// The destination data. /// The source data. @@ -191,8 +191,32 @@ public: /// True if fails, otherwise false. static bool Resize(TextureData& dst, const TextureData& src, int32 dstWidth, int32 dstHeight); + /// + /// Updates the texture data. Supports transcoding source data (eg. Basis). + /// + /// The GPU context. + /// The destination GPU texture. It has to be allocated. + /// The destination surface index in the texture array. + /// The absolute index of the mip map to update. + /// The buffer with texture the data. + /// The row pitch (in bytes) of the input data. + /// The slice pitch (in bytes) of the input data. + /// The format of the data. If different then GPU texture format then runtime conversion might happen. + static bool UpdateTexture(GPUContext* context, GPUTexture* texture, int32 arrayIndex, int32 mipIndex, Span data, uint32 rowPitch, uint32 slicePitch, PixelFormat dataFormat); + + /// + /// Calculates the runtime format for the texture pixels based on the texture type and input texture data (format and size). Checks the current GPUDevice formats support. + /// + /// Type fo the texture that hints it's usage. + /// The existing texture data format (ideally to preserve to avoid conversions). + /// Width of the texture (in pixels). + /// Height of the texture (in pixels). + /// Indicates that texture data is stored in sRGB color space. + /// Resolved pixel format for the GPU Texture. + static PixelFormat GetTextureFormat(TextureFormatType textureType, PixelFormat dataFormat, int32 width, int32 height, bool sRGB); + public: - static PixelFormat ToPixelFormat(TextureFormatType format, int32 width, int32 height, bool canCompress); + static PixelFormat ToPixelFormat(TextureFormatType format, int32 width, int32 height, bool canCompress = true); private: enum class ImageType @@ -229,7 +253,11 @@ private: static bool ResizeStb(TextureData& dst, const TextureData& src, int32 dstWidth, int32 dstHeight); #endif #if COMPILE_WITH_ASTC - static bool ConvertAstc(TextureData& dst, const TextureData& src, const PixelFormat dstFormat); + static bool ConvertAstc(TextureData& dst, const TextureData& src, PixelFormat dstFormat); +#endif +#if COMPILE_WITH_BASISU + static bool ConvertBasisUniversal(TextureData& dst, const TextureData& src); + static bool UpdateTextureBasisUniversal(GPUContext* context, GPUTexture* texture, int32 arrayIndex, int32 mipIndex, Span data, uint32 rowPitch, uint32 slicePitch, PixelFormat dataFormat); #endif }; diff --git a/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp b/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp index 99ee78780..fd07c59f4 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp @@ -758,6 +758,17 @@ bool TextureTool::ConvertStb(TextureData& dst, const TextureData& src, const Pix if (ConvertAstc(dst, *textureData, dstFormat)) #else LOG(Error, "Missing ASTC texture format compression lib."); +#endif + { + return true; + } + } + else if (dstFormat == PixelFormat::Basis) + { +#if COMPILE_WITH_BASISU + if (ConvertBasisUniversal(dst, *textureData)) +#else + LOG(Error, "Missing Basis Universal texture format compression lib."); #endif { return true; diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/basisu_encoder.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/basisu_encoder.lib new file mode 100644 index 000000000..764c65b15 --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/basisu_encoder.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4598efd319057ff5812cc4baf3d04bcc9a2c37eed4ab2ea897b9ac7fdddac9cb +size 6940672 diff --git a/Source/ThirdParty/astc/astc.Build.cs b/Source/ThirdParty/astc/astc.Build.cs index 55eb18955..dc5177b03 100644 --- a/Source/ThirdParty/astc/astc.Build.cs +++ b/Source/ThirdParty/astc/astc.Build.cs @@ -11,7 +11,7 @@ public class astc : DepsModule /// /// Returns true if can use astc lib for a given build setup. /// - public static bool IsSupported(BuildOptions options) + public static bool Use(BuildOptions options) { switch (options.Platform.Target) { diff --git a/Source/ThirdParty/basis_universal/LICENSE b/Source/ThirdParty/basis_universal/LICENSE new file mode 100644 index 000000000..94ea88043 --- /dev/null +++ b/Source/ThirdParty/basis_universal/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019-2026 Binomial LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Source/ThirdParty/basis_universal/basis_universal.Build.cs b/Source/ThirdParty/basis_universal/basis_universal.Build.cs new file mode 100644 index 000000000..770851628 --- /dev/null +++ b/Source/ThirdParty/basis_universal/basis_universal.Build.cs @@ -0,0 +1,46 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// https://github.com/BinomialLLC/basis_universal +/// +public class basis_universal : DepsModule +{ + /// + /// Returns true if can use basis_universal lib for a given build setup. + /// + public static bool Use(BuildOptions options) + { + switch (options.Platform.Target) + { + case TargetPlatform.Windows: + case TargetPlatform.Web: + return true; + default: + return false; + } + } + + /// + public override void Init() + { + base.Init(); + + LicenseType = LicenseTypes.Apache2; + LicenseFilePath = "LICENSE"; + + // Merge third-party modules into engine binary + BinaryModuleName = "FlaxEngine"; + } + + /// + public override void Setup(BuildOptions options) + { + base.Setup(options); + + options.PublicDefinitions.Add("COMPILE_WITH_BASISU"); + AddLib(options, options.DepsFolder, "basisu_encoder"); + } +} diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h b/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h new file mode 100644 index 000000000..bcc4bfd2b --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h @@ -0,0 +1,132 @@ +// File: basisu_astc_hdr_6x6_enc.h +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +namespace astc_6x6_hdr +{ + const uint32_t ASTC_HDR_6X6_DEF_USER_COMP_LEVEL = 2; + const uint32_t ASTC_HDR_6X6_MAX_USER_COMP_LEVEL = 12; + + const uint32_t ASTC_HDR_6X6_MAX_COMP_LEVEL = 4; + + const float LDR_BLACK_BIAS = 0.0f;// .49f; + + // Note: This struct is copied several times, so do not place any heavyweight objects in here. + struct astc_hdr_6x6_global_config + { + // Important: The Delta ITP colorspace error metric we use internally makes several assumptions about the nature of the HDR RGB inputs supplied to the encoder. + // This encoder computes colorspace error in the ICtCp (or more accurately the delta ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important. + // By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m^2), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light). + // If the m_rec2020_bt2100_color_gamut flag is true, the input colorspace is treated as REC 2020/BT.2100 (which is wider than 709). + // For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). + // SDR upconversion to normalized [0,1] (i.e. non-absolute) luminances may work but is not supported because ITP errors will not be predicted correctly. + // 11/3/2025: This flag is always copied straight into the output KTX2 DFD colorspace, even for non-HDR formats. + // TODO: Move this parameter to reflect this. + bool m_rec2020_bt2100_color_gamut = false; + + // levels 0-3 normal levels, 4=exhaustive + uint32_t m_master_comp_level = 0; + uint32_t m_highest_comp_level = 1; + + float m_lambda = 0.0f; + + bool m_extra_patterns_flag = false; // def to false, works in comp levels [1,4] + bool m_brute_force_partition_matching = false; // def to false + + bool m_jnd_optimization = false; // defaults to false for HDR inputs, on SDR upconverted images this can default to enabled + float m_jnd_delta_itp_thresh = .75f; + + bool m_force_one_strip = false; + + bool m_gaussian1_fallback = true; // def to true, if this is disabled m_gaussian2_fallback should be disabled too + float m_gaussian1_strength = 1.45f; + + bool m_gaussian2_fallback = true; // def to true, hopefully rarely kicks in + float m_gaussian2_strength = 1.83f; + + // m_disable_delta_endpoint_usage may give a slight increase in RDO ASTC encoding efficiency. It's also faster. + bool m_disable_delta_endpoint_usage = false; + + // Scale up Delta ITP errors for very dark pixels, assuming they will be brightly exposed > 1.0x. + // We don't know if the output will be exposed, or not. If heavily exposed, our JND calculations will not be conservative enough. + bool m_delta_itp_dark_adjustment = true; + + bool m_debug_images = false; + std::string m_debug_image_prefix = "dbg_astc_hdr_6x6_devel_"; + + bool m_output_images = false; + std::string m_output_image_prefix = "dbg_astc_hdr_6x6_output_"; + + bool m_debug_output = false; + bool m_image_stats = false; + bool m_status_output = false; + + //------------------------------------------------------------------------------------- + // Very low level/devel parameters - intended for development. Best not to change them. + //------------------------------------------------------------------------------------- + bool m_deblocking_flag = true; + float m_deblock_penalty_weight = .03f; + bool m_disable_twothree_subsets = false; // def to false + bool m_use_solid_blocks = true; // def to true + bool m_use_runs = true; // def to true + bool m_block_stat_optimizations_flag = true; // def to true + + bool m_rdo_candidate_diversity_boost = true; // def to true + float m_rdo_candidate_diversity_boost_bit_window_weight = 1.2f; + + bool m_favor_higher_compression = true; // utilize all modes + uint32_t m_num_reuse_xy_deltas = basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS; + + void print() const + { + basisu::fmt_debug_printf("m_master_comp_level: {}, m_highest_comp_level: {}\n", m_master_comp_level, m_highest_comp_level); + basisu::fmt_debug_printf("m_lambda: {}\n", m_lambda); + basisu::fmt_debug_printf("m_rec2020_bt2100_color_gamut: {}\n", m_rec2020_bt2100_color_gamut); + basisu::fmt_debug_printf("m_extra_patterns_flag: {}, m_brute_force_partition_matching: {}\n", m_extra_patterns_flag, m_brute_force_partition_matching); + basisu::fmt_debug_printf("m_jnd_optimization: {}, m_jnd_delta_itp_thresh: {}\n", m_jnd_optimization, m_jnd_delta_itp_thresh); + basisu::fmt_debug_printf("m_force_one_strip: {}\n", m_force_one_strip); + basisu::fmt_debug_printf("m_gaussian1_fallback: {}, m_gaussian1_strength: {}\n", m_gaussian1_fallback, m_gaussian1_strength); + basisu::fmt_debug_printf("m_gaussian2_fallback: {}, m_gaussian2_strength: {}\n", m_gaussian2_fallback, m_gaussian2_strength); + basisu::fmt_debug_printf("m_disable_delta_endpoint_usage: {}\n", m_disable_delta_endpoint_usage); + basisu::fmt_debug_printf("m_delta_itp_dark_adjustment: {}\n", m_delta_itp_dark_adjustment); + basisu::fmt_debug_printf("m_debug_images: {}, m_debug_image_prefix: {}\n", m_debug_images, m_debug_image_prefix); + basisu::fmt_debug_printf("m_output_images: {}, m_output_image_prefix: {}\n", m_output_images, m_output_image_prefix); + basisu::fmt_debug_printf("m_image_stats: {}, m_status_output: {}\n", m_image_stats, m_status_output); + basisu::fmt_debug_printf("m_deblocking_flag: {}, m_deblock_penalty_weight: {}\n", m_deblocking_flag, m_deblock_penalty_weight); + basisu::fmt_debug_printf("m_disable_twothree_subsets: {}, m_use_solid_blocks: {}\n", m_disable_twothree_subsets, m_use_solid_blocks); + basisu::fmt_debug_printf("m_use_runs: {}, m_block_stat_optimizations_flag: {}\n", m_use_runs, m_block_stat_optimizations_flag); + basisu::fmt_debug_printf("m_rdo_candidate_diversity_boost: {}, m_rdo_candidate_diversity_boost_bit_window_weight: {}\n", m_rdo_candidate_diversity_boost, m_rdo_candidate_diversity_boost_bit_window_weight); + basisu::fmt_debug_printf("m_favor_higher_compression: {}, m_num_reuse_xy_deltas: {}\n", m_favor_higher_compression, m_num_reuse_xy_deltas); + } + + astc_hdr_6x6_global_config() + { + } + + void clear() + { + astc_hdr_6x6_global_config def; + std::swap(*this, def); + } + + // Max level is ASTC_HDR_6X6_MAX_USER_COMP_LEVEL + void set_user_level(int level); + }; + + void global_init(); + + struct result_metrics + { + basisu::image_metrics m_im_astc_log2; + basisu::image_metrics m_im_astc_half; + + basisu::image_metrics m_im_bc6h_log2; + basisu::image_metrics m_im_bc6h_half; + }; + + // The input image should be unpadded to 6x6 boundaries, i.e. the original unexpanded image. + bool compress_photo(const basisu::imagef& orig_src_img, const astc_hdr_6x6_global_config& global_cfg, basisu::job_pool* pJob_pool, + basisu::uint8_vec& intermediate_tex_data, basisu::uint8_vec& astc_tex_data, result_metrics& metrics); + +} // namespace uastc_6x6_hdr diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_common.h b/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_common.h new file mode 100644 index 000000000..e01999248 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_astc_hdr_common.h @@ -0,0 +1,433 @@ +// File: basisu_astc_hdr_common.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +namespace basisu +{ + const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6; + const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6; + + const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec) + const uint32_t MODE7_TOTAL_SUBMODES = 6; + + // [ise_range][0] = # levels + // [ise_range][1...] = lerp value [0,64] + // in ASTC order + // Supported ISE weight ranges: 0 to 11, 12 total + const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels + const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=32 levels + const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2; + const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32; + + extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33]; + + const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias + const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now + + const float LDR_TO_HDR_NITS = 100.0f; + + extern vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; + extern uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index + extern uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index + + struct astc_hdr_codec_base_options + { + float m_r_err_scale, m_g_err_scale; + float m_q_log_bias; + + bool m_ultra_quant; + + // If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however. + bool m_allow_uber_mode; + + bool m_mode7_full_s_optimization; + + bool m_take_first_non_clamping_mode11_submode; + bool m_take_first_non_clamping_mode7_submode; + + bool m_disable_weight_plane_optimization; + + astc_hdr_codec_base_options() { init(); } + + void init(); + }; + + inline int get_bit( + int src_val, int src_bit) + { + assert(src_bit >= 0 && src_bit <= 31); + int bit = (src_val >> src_bit) & 1; + return bit; + } + + inline void pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) + { + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = get_bit(src_val, src_bit); + dst |= (bit << dst_bit); + } + + inline uint32_t get_max_qlog(uint32_t bits) + { + switch (bits) + { + case 7: return basist::MAX_QLOG7; + case 8: return basist::MAX_QLOG8; + case 9: return basist::MAX_QLOG9; + case 10: return basist::MAX_QLOG10; + case 11: return basist::MAX_QLOG11; + case 12: return basist::MAX_QLOG12; + case 16: return basist::MAX_QLOG16; + default: assert(0); break; + } + return 0; + } + +#if 0 + inline float get_max_qlog_val(uint32_t bits) + { + switch (bits) + { + case 7: return MAX_QLOG7_VAL; + case 8: return MAX_QLOG8_VAL; + case 9: return MAX_QLOG9_VAL; + case 10: return MAX_QLOG10_VAL; + case 11: return MAX_QLOG11_VAL; + case 12: return MAX_QLOG12_VAL; + case 16: return MAX_QLOG16_VAL; + default: assert(0); break; + } + return 0; + } +#endif + +#if 0 + // Input is the low 11 bits of the qlog + // Returns the 10-bit mantissa of the half float value + int qlog11_to_half_float_mantissa(int M) + { + assert(M <= 0x7FF); + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + return (Mt >> 3); + } +#endif + + // Input is the 10-bit mantissa of the half float value + // Output is the 11-bit qlog value + // Inverse of qlog11_to_half_float_mantissa() + inline int half_float_mantissa_to_qlog11(int hf) + { + int q0 = (hf * 8 + 2) / 3; + int q1 = (hf * 8 + 2048 + 4) / 5; + + if (q0 < 512) + return q0; + else if (q1 >= 1536) + return q1; + + int q2 = (hf * 8 + 512 + 2) / 4; + return q2; + } + + inline int half_to_qlog16(int hf) + { + assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf)); + + // extract 5 bits exponent, which is carried through to qlog16 unchanged + const int exp = (hf >> 10) & 0x1F; + + // extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless) + const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF); + assert(mantissa <= 0x7FF); + + // Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights. + uint32_t qlog16 = (exp << 11) | mantissa; + + // should be a lossless operation + assert(astc_helpers::qlog16_to_half(qlog16) == hf); + + return qlog16; + } + + void interpolate_qlog12_colors( + const int e[2][3], + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range); + + bool get_astc_hdr_mode_11_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + bool get_astc_hdr_mode_7_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + // Fast high precision piecewise linear approximation of log2(bias+x). + // Half may be zero, positive or denormal. No NaN/Inf/negative. + BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias) + { + union { float f; int32_t i; uint32_t u; } fi; + + fi.f = fast_half_to_float_pos_not_inf_or_nan(x); + + assert(fi.f >= 0.0f); + + fi.f += log_bias; + + return (double)fi.u; // approx log2f(fi.f), need to return double for the precision + } + + BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias) + { + union { float f; int32_t i; uint32_t u; } fi; + + fi.f = fast_half_to_float_pos_not_inf_or_nan(x); + + assert(fi.f >= 0.0f); + + fi.f += log_bias; + + return fi.u; + } + + double eval_selectors( + uint32_t num_pixels, + uint8_t* pWeights, + uint32_t ise_weight_range, + const basist::half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const basist::half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask = UINT32_MAX); + + double eval_selectors_dual_plane( + uint32_t channel_index, + uint32_t num_pixels, + uint8_t* pWeights0, uint8_t* pWeights1, + const basist::half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const basist::half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask = UINT32_MAX); + + double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions); + + const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0; + const uint32_t MAX_MODE7_SUBMODE_INDEX = 5; + + bool pack_mode7( + const vec3F& high_color_q16, const float s_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing + const astc_hdr_codec_base_options& coptions, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used); + + bool try_mode7( + uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& high_color_q16, const float s_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX); + + bool pack_mode11( + const vec3F& low_color_q16, const vec3F& high_color_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + const astc_hdr_codec_base_options& coptions, + bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used); + + bool try_mode11(uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping); + + bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping); + + const int FIRST_MODE11_SUBMODE_INDEX = -1; + const int MAX_MODE11_SUBMODE_INDEX = 7; + + enum opt_mode_t + { + cNoOpt, + cOrdinaryLeastSquares, + cWeightedLeastSquares, + cWeightedLeastSquaresHeavy, + cWeightedAverage + }; + + struct encode_astc_block_stats + { + uint32_t m_num_pixels; + vec3F m_mean_q16; + vec3F m_axis_q16; + + void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]); + }; + + double encode_astc_hdr_block_mode_11( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, + opt_mode_t opt_mode, + const encode_astc_block_stats *pBlock_stats = nullptr); + + double encode_astc_hdr_block_downsampled_mode_11( + uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y, + uint32_t ise_weight_range, uint32_t ise_endpoint_range, + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + double cur_block_error, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode, + uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode, + const astc_hdr_codec_base_options& coptions, + const encode_astc_block_stats* pBlock_stats = nullptr); + + double encode_astc_hdr_block_mode_11_dual_plane( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t channel_index, // 0-2 + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, + bool ignore_clamping); + + double encode_astc_hdr_block_mode_7( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, //[4] + uint8_t* blk_weights, // [num_pixels] + const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, + const encode_astc_block_stats *pBlock_stats = nullptr); + + //-------------------------------------------------------------------------------------------------------------------------- + + struct mode11_log_desc + { + int32_t m_submode; + int32_t m_maj_comp; + + // Or R0, G0, B0 if maj_comp==3 (direct) + int32_t m_a; // positive + int32_t m_c; // positive + int32_t m_b0; // positive + + // Or R1, G1, B1 if maj_comp==3 (direct) + int32_t m_b1; // positive + int32_t m_d0; // if not direct, is signed + int32_t m_d1; // if not direct, is signed + + // limits if not direct + int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits; + int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val; + + void clear() { clear_obj(*this); } + + bool is_direct() const { return m_maj_comp == 3; } + }; + + //-------------------------------------------------------------------------------------------------------------------------- + bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh); + + bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); + bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); + void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16); + + bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints); + void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc); + + void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); + void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); + + void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights); + + const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height); + const float* get_8x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height); + + void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + void compute_upsample_matrix_transposed(basisu::vector& unweighted_downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + void compute_diag_AtA_vector(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height, const vector2D& upsample_matrix, float* pDst_vec); + + void downsample_weight_grid( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + uint8_t* pDst_weights); // [wy][wx] + + void downsample_ise_weights( + uint32_t weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights, uint8_t* pDst_weights); + + void downsample_ise_weights_dual_plane( + uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1, + uint8_t* pDst_weights); + + bool refine_endpoints( + uint32_t cem, + uint32_t endpoint_ise_range, + uint8_t* pEndpoint_vals, // the endpoints to optimize + uint32_t block_w, uint32_t block_h, // block dimensions + uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets + astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode); + + extern bool g_astc_hdr_enc_initialized; + + // This MUST be called before encoding any blocks. + void astc_hdr_enc_init(); + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_common.h b/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_common.h new file mode 100644 index 000000000..76e7e3f1f --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_common.h @@ -0,0 +1,445 @@ +// File: basisu_astc_ldr_common.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include + +namespace basisu +{ + +namespace astc_ldr +{ + const uint32_t ASTC_LDR_MAX_BLOCK_WIDTH = astc_helpers::MAX_BLOCK_DIM; // 12 + const uint32_t ASTC_LDR_MAX_BLOCK_HEIGHT = astc_helpers::MAX_BLOCK_DIM; // 12 + const uint32_t ASTC_LDR_MAX_BLOCK_PIXELS = astc_helpers::MAX_BLOCK_PIXELS; // 144 + const uint32_t ASTC_LDR_MAX_RAW_WEIGHTS = astc_helpers::MAX_WEIGHT_INTERPOLANT_VALUE + 1; // 65 + + const uint32_t WEIGHT_REFINER_MAX_PASSES = 17; + + inline basist::color_rgba convert_to_basist_color_rgba(const color_rgba& c) + { + return basist::color_rgba(c.r, c.g, c.b, c.a); + } + + struct cem_encode_params + { + uint32_t m_comp_weights[4]; + bool m_decode_mode_srgb; // todo: store astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8 instead, also the alpha mode for srgb because the decoders are broken + + const uint8_t* m_pForced_weight_vals0; + const uint8_t* m_pForced_weight_vals1; + + uint32_t m_max_ls_passes, m_total_weight_refine_passes; + bool m_worst_weight_nudging_flag; + bool m_endpoint_refinement_flag; + + cem_encode_params() + { + init(); + } + + void init() + { + m_comp_weights[0] = 1; + m_comp_weights[1] = 1; + m_comp_weights[2] = 1; + m_comp_weights[3] = 1; + + m_decode_mode_srgb = true; + + m_pForced_weight_vals0 = nullptr; + m_pForced_weight_vals1 = nullptr; + + m_max_ls_passes = 3; + m_total_weight_refine_passes = 0; + m_worst_weight_nudging_flag = false; + m_endpoint_refinement_flag = false; + } + + float get_total_comp_weights() const + { + return (float)(m_comp_weights[0] + m_comp_weights[1] + m_comp_weights[2] + m_comp_weights[3]); + } + }; + + struct pixel_stats_t + { + uint32_t m_num_pixels; + + color_rgba m_pixels[ASTC_LDR_MAX_BLOCK_PIXELS]; + vec4F m_pixels_f[ASTC_LDR_MAX_BLOCK_PIXELS]; + + color_rgba m_min, m_max; + + vec4F m_min_f, m_max_f; + vec4F m_mean_f; + + // Always 3D, ignoring alpha + vec3F m_mean_rel_axis3; + vec3F m_zero_rel_axis3; + + // Always 4D + vec4F m_mean_rel_axis4; + + bool m_has_alpha; + + stats m_rgba_stats[4]; + + void clear() + { + clear_obj(*this); + } + + void init(uint32_t num_pixels, const color_rgba* pPixels); + + }; // struct struct pixel_stats + + void global_init(); + + void bit_transfer_signed_enc(int& a, int& b); + void bit_transfer_signed_dec(int& a, int& b); // transfers MSB from a to b, a is then [-32,31] + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b); + int quant_preserve2(uint32_t ise_range, uint32_t v); + + uint32_t get_colors(const color_rgba& l, const color_rgba& h, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb); + uint32_t get_colors_raw_weights(const color_rgba& l, const color_rgba& h, color_rgba* pColors, bool decode_mode_srgb); + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h); // assume BISE 20 + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale = nullptr); + uint32_t get_colors(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb); + uint32_t get_colors_raw_weights(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba* pColors, bool decode_mode_srgb); + + //int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta); + int apply_delta_to_bise_weight_val(uint32_t weight_ise_range, int ise_val, int delta); + + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution_dp( + uint32_t ccs_index, + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution_dp( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, uint32_t ccs_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params); + + //bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + //bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + //bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + + uint64_t cem_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error, + bool use_blue_contraction, bool* pBase_ofs_clamped_flag); + + // TODO: Rename, confusing vs. std::vector or basisu::vector or vec4F etc. + struct partition_pattern_vec + { + uint32_t m_width, m_height; + uint8_t m_parts[ASTC_LDR_MAX_BLOCK_PIXELS]; + + partition_pattern_vec(); + + partition_pattern_vec(const partition_pattern_vec& other); + + partition_pattern_vec(uint32_t width, uint32_t height, const uint8_t* pParts = nullptr); + + void init(uint32_t width, uint32_t height, const uint8_t* pParts = nullptr); + + void init_part_hist(); + + void clear(); + + partition_pattern_vec& operator= (const partition_pattern_vec& rhs); + + uint32_t get_width() const { return m_width; } + uint32_t get_height() const { return m_height; } + uint32_t get_total() const { return m_width * m_height; } + + uint8_t operator[] (uint32_t i) const { assert(i < get_total()); return m_parts[i]; } + uint8_t& operator[] (uint32_t i) { assert(i < get_total()); return m_parts[i]; } + + uint8_t operator() (uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); return m_parts[x + y * m_width]; } + uint8_t& operator() (uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height)); return m_parts[x + y * m_width]; } + + int get_squared_distance(const partition_pattern_vec& other) const; + + float get_distance(const partition_pattern_vec& other) const + { + return sqrtf((float)get_squared_distance(other)); + } + + enum { cMaxPermute2Index = 1 }; + partition_pattern_vec get_permuted2(uint32_t permute_index) const; + + enum { cMaxPermute3Index = 5 }; + partition_pattern_vec get_permuted3(uint32_t permute_index) const; + + partition_pattern_vec get_canonicalized() const; + + bool operator== (const partition_pattern_vec& rhs) const + { + if ((m_width != rhs.m_width) || (m_height != rhs.m_height)) + return false; + + return memcmp(m_parts, rhs.m_parts, get_total()) == 0; + } + + operator size_t() const + { + return basist::hash_hsieh(m_parts, get_total()); + } + }; + + struct vp_tree_node + { + partition_pattern_vec m_vantage_point; + uint32_t m_point_index; + float m_dist; + + int m_inner_node, m_outer_node; + }; + + const uint32_t NUM_PART3_MAPPINGS = 6; + extern uint8_t g_part3_mapping[NUM_PART3_MAPPINGS][3]; + + class vp_tree + { + public: + vp_tree() + { + } + + void clear() + { + m_nodes.clear(); + } + + // This requires no redundant patterns, i.e. all must be unique. + bool init(uint32_t n, const partition_pattern_vec* pUnique_pats); + + struct result + { + uint32_t m_pat_index; + uint32_t m_mapping_index; + float m_dist; + + bool operator< (const result& rhs) const { return m_dist < rhs.m_dist; } + bool operator> (const result& rhs) const { return m_dist > rhs.m_dist; } + }; + + class result_queue + { + enum { MaxSupportedSize = 512 + 1 }; + + public: + result_queue() : + m_cur_size(0) + { + } + + size_t get_size() const + { + return m_cur_size; + } + + bool empty() const + { + return !m_cur_size; + } + + typedef std::array result_array_type; + + const result_array_type& get_elements() const { return m_elements; } + result_array_type& get_elements() { return m_elements; } + + void clear() + { + m_cur_size = 0; + } + + void reserve(uint32_t n) + { + BASISU_NOTE_UNUSED(n); + } + + const result& top() const + { + assert(m_cur_size); + return m_elements[1]; + } + + bool insert(const result& val, uint32_t max_size) + { + assert(max_size < MaxSupportedSize); + + if (m_cur_size >= MaxSupportedSize) + return false; + + m_elements[++m_cur_size] = val; + up_heap(m_cur_size); + + if (m_cur_size > max_size) + pop(); + + return true; + } + + bool pop() + { + if (m_cur_size == 0) + return false; + + m_elements[1] = m_elements[m_cur_size--]; + down_heap(1); + return true; + } + + float get_highest_dist() const + { + if (!m_cur_size) + return 0.0f; + + return top().m_dist; + } + + private: + result_array_type m_elements; + size_t m_cur_size; + + void up_heap(size_t index) + { + while ((index > 1) && (m_elements[index] > m_elements[index >> 1])) + { + std::swap(m_elements[index], m_elements[index >> 1]); + index >>= 1; + } + } + + void down_heap(size_t index) + { + for (; ; ) + { + size_t largest = index, left_child = 2 * index, right_child = 2 * index + 1; + + if ((left_child <= m_cur_size) && (m_elements[left_child] > m_elements[largest])) + largest = left_child; + + if ((right_child <= m_cur_size) && (m_elements[right_child] > m_elements[largest])) + largest = right_child; + + if (largest == index) + break; + + std::swap(m_elements[index], m_elements[largest]); + index = largest; + } + } + }; + + void find_nearest(uint32_t num_subsets, const partition_pattern_vec& desired_pat, result_queue& results, uint32_t max_results) const; + + private: + basisu::vector m_nodes; + + void find_nearest_at_node(int node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const; + + void find_nearest_at_node_non_recursive(int init_node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const; + + // returns the index of the new node, or -1 on error + int create_node(uint32_t n, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices); + + // returns the pattern index of the vantage point (-1 on error), and the optimal split distance + std::pair find_best_vantage_point(uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices); + }; + + typedef basisu::hash_map > partition_hash_map; + + struct partition_pattern_hist + { + uint8_t m_hist[4]; + + partition_pattern_hist() { clear(); } + + void clear() { clear_obj(m_hist); } + }; + + struct partitions_data + { + uint32_t m_width, m_height, m_num_partitions; + partition_pattern_vec m_partition_pats[astc_helpers::NUM_PARTITION_PATTERNS]; // indexed by unique index, NOT the 10-bit ASTC seed/pattern index + + partition_pattern_hist m_partition_pat_histograms[astc_helpers::NUM_PARTITION_PATTERNS]; // indexed by unique index, histograms of each pattern + + // ASTC seed to unique index and vice versa + int16_t m_part_seed_to_unique_index[astc_helpers::NUM_PARTITION_PATTERNS]; + int16_t m_unique_index_to_part_seed[astc_helpers::NUM_PARTITION_PATTERNS]; + + // Total number of unique patterns + uint32_t m_total_unique_patterns; + + // VP tree used to rapidly find nearby/similar patterns. + vp_tree m_part_vp_tree; + + void init(uint32_t num_partitions, uint32_t block_width, uint32_t block_height, bool init_vp_tree = true); + }; + + float surrogate_quant_endpoint_val(float e, uint32_t num_endpoint_levels, uint32_t flags); + vec4F surrogate_quant_endpoint(const vec4F& e, uint32_t num_endpoint_levels, uint32_t flags); + + float surrogate_evaluate_rgba_sp(const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, uint32_t num_weight_levels, const cem_encode_params& enc_params, uint32_t flags); + float surrogate_evaluate_rgba_dp(uint32_t ccs_index, const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, float* pWeights1, uint32_t num_weight_levels, const cem_encode_params& enc_params, uint32_t flags); + + enum + { + cFlagDisableQuant = 1, + cFlagNoError = 2 + } + ; + float cem_surrogate_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float& s, float* pWeights0, float* pWeights1, uint32_t flags = 0); + +#if 0 + bool requantize_ise_endpoints(uint32_t cem, + uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, + uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints); + + uint32_t get_base_cem_without_alpha(uint32_t cem); + + bool pack_base_offset( + uint32_t cem_index, uint32_t dst_ise_endpoint_range, uint8_t* pPacked_endpoints, + const color_rgba& l, const color_rgba& h, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag, bool& endpoints_swapped); + + bool convert_endpoints_across_cems( + uint32_t prev_cem, uint32_t prev_endpoint_ise_range, const uint8_t* pPrev_endpoints, + uint32_t dst_cem, uint32_t dst_endpoint_ise_range, uint8_t* pDst_endpoints, + bool always_repack, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag); +#endif + +} // namespace astc_ldr + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_encode.h b/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_encode.h new file mode 100644 index 000000000..c9e0e836d --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_astc_ldr_encode.h @@ -0,0 +1,118 @@ +// File: basisu_astc_ldr_encode.cpp +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_astc_helpers.h" + +namespace basisu { +namespace astc_ldr { + + void encoder_init(); + + const int EFFORT_LEVEL_MIN = 0, EFFORT_LEVEL_MAX = 10, EFFORT_LEVEL_DEF = 3; + const int DCT_QUALITY_MIN = 1, DCT_QUALITY_MAX = 100; + + struct astc_ldr_encode_config + { + astc_ldr_encode_config() + { + } + + void clear() + { + *this = astc_ldr_encode_config(); + } + + // ASTC LDR block dimensions. Must be a valid ASTC block dimension. Any supported from 4x4-12x12, including unequal dimensions. + uint32_t m_astc_block_width = 6; + uint32_t m_astc_block_height = 6; + + // If true, the encoder assumes all ASTC blocks will be decompressed using sRGB vs. LDR8 mode. This corresponds to astcenc's -cs vs. cl color profiles. + // This should match how the texture is later decoded by the GPU for maximum quality. This bit is stored into the output file. + bool m_astc_decode_mode_srgb = true; + + // If true, trade off some compression (3-10%) for faster decompression. + // If false, favor highest compression, but slower decompression. + //bool m_use_faster_format = true; + + basist::astc_ldr_t::xuastc_ldr_syntax m_compressed_syntax = basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith; + + // Encoder CPU effort vs. quality. [0,10], higher=better. + // 0=extremely fast but very brittle (no subsets) + // 1=first 2 subset effort level + // 10=extremely high CPU requirements. + uint32_t m_effort_level = 3; + + // Weight grid DCT quality [1,100] - higher=better quality (JPEG-style). + float m_dct_quality = 85; + + // true=use weight grid DCT, false=always use DPCM + bool m_use_dct = false; + + // true=use lossy supercompression, false=supercompression stage is always lossless. + bool m_lossy_supercompression = false; + + // Channel weights used to compute RGBA colorspace L2 errors. Must be >= 1. + uint32_t m_comp_weights[4] = { 1, 1, 1, 1 }; + + // Lossy supercompression stage parameters for RGB vs. RGBA image inputs. + // (Bounded RDO - explictly not Lagrangian.) + float m_replacement_min_psnr = 35.0f; // if the block's base PSNR is less than this, it cannot be changed + float m_psnr_trial_diff_thresh = 1.5f; // reject candidates if their PSNR is lower than m_replacement_min_psnr-m_psnr_trial_diff_thresh + float m_psnr_trial_diff_thresh_edge = 1.0f; // edge variant + + // Lossy supercompression settings - alpha texture variants + float m_replacement_min_psnr_alpha = 38.0f; + float m_psnr_trial_diff_thresh_alpha = .75f; + float m_psnr_trial_diff_thresh_edge_alpha = .5f; + + // If true, try encoding blurred blocks, in addition to unblurred, for superpass 1 and 2. + // Higher quality, but massively slower and not yet tuned/refined. + bool m_block_blurring_p1 = false, m_block_blurring_p2 = false; + + // If true, no matter what effort level subset usage will be disabled. + bool m_force_disable_subsets = false; + + // If true, no matter what effort level RGB dual plane usage will be disabled. + bool m_force_disable_rgb_dual_plane = false; + + bool m_debug_images = false; + bool m_debug_output = false; + + std::string m_debug_file_prefix; + + void debug_print() const + { + fmt_debug_printf("ASTC block dimensions: {}x{}\n", m_astc_block_width, m_astc_block_height); + fmt_debug_printf("ASTC decode profile mode sRGB: {}\n", m_astc_decode_mode_srgb); + fmt_debug_printf("Syntax: {}\n", (uint32_t)m_compressed_syntax); + fmt_debug_printf("Effort level: {}\n", m_effort_level); + fmt_debug_printf("Use DCT: {}\n", m_use_dct); + fmt_debug_printf("DCT quality level (1-100): {}\n", m_dct_quality); + fmt_debug_printf("Comp weights: {} {} {} {}\n", m_comp_weights[0], m_comp_weights[1], m_comp_weights[2], m_comp_weights[3]); + fmt_debug_printf("Block blurring: {} {}\n", m_block_blurring_p1, m_block_blurring_p2); + fmt_debug_printf("Force disable subsets: {}\n", m_force_disable_subsets); + fmt_debug_printf("Force disable RGB dual plane: {}\n", m_force_disable_rgb_dual_plane); + + fmt_debug_printf("\nLossy supercompression: {}\n", m_lossy_supercompression); + fmt_debug_printf("m_replacement_min_psnr: {}\n", m_replacement_min_psnr); + fmt_debug_printf("m_psnr_trial_diff_thresh: {}\n", m_psnr_trial_diff_thresh); + fmt_debug_printf("m_psnr_trial_diff_thresh_edge: {}\n", m_psnr_trial_diff_thresh_edge); + fmt_debug_printf("m_replacement_min_psnr_alpha: {}\n", m_replacement_min_psnr_alpha); + fmt_debug_printf("m_psnr_trial_diff_thresh_alpha: {}\n", m_psnr_trial_diff_thresh_alpha); + fmt_debug_printf("m_psnr_trial_diff_thresh_edge_alpha: {}\n", m_psnr_trial_diff_thresh_edge_alpha); + + fmt_debug_printf("m_debug_images: {}\n", m_debug_images); + } + }; + + bool compress_image( + const image& orig_img, uint8_vec &comp_data, vector2D& coded_blocks, + const astc_ldr_encode_config& global_cfg, + job_pool& job_pool); + + void deblock_filter(uint32_t filter_block_width, uint32_t filter_block_height, const image& src_img, image& dst_img, bool stronger_filtering = false, int SKIP_THRESH = 24); + +} // namespace astc_ldr +} // namespace basisu + + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_backend.h b/Source/ThirdParty/basis_universal/encoder/basisu_backend.h new file mode 100644 index 000000000..b33655823 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_backend.h @@ -0,0 +1,408 @@ +// basisu_backend.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "../transcoder/basisu.h" +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_internal.h" +#include "basisu_frontend.h" + +namespace basisu +{ + struct etc1_selector_palette_entry + { + etc1_selector_palette_entry() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + + uint8_t operator[] (uint32_t i) const { assert(i < 16); return m_selectors[i]; } + uint8_t& operator[] (uint32_t i) { assert(i < 16); return m_selectors[i]; } + + void set_uint32(uint32_t v) + { + for (uint32_t byte_index = 0; byte_index < 4; byte_index++) + { + uint32_t b = (v >> (byte_index * 8)) & 0xFF; + + m_selectors[byte_index * 4 + 0] = b & 3; + m_selectors[byte_index * 4 + 1] = (b >> 2) & 3; + m_selectors[byte_index * 4 + 2] = (b >> 4) & 3; + m_selectors[byte_index * 4 + 3] = (b >> 6) & 3; + } + } + + uint32_t get_uint32() const + { + return get_byte(0) | (get_byte(1) << 8) | (get_byte(2) << 16) | (get_byte(3) << 24); + } + + uint32_t get_byte(uint32_t byte_index) const + { + assert(byte_index < 4); + + return m_selectors[byte_index * 4 + 0] | + (m_selectors[byte_index * 4 + 1] << 2) | + (m_selectors[byte_index * 4 + 2] << 4) | + (m_selectors[byte_index * 4 + 3] << 6); + } + + uint8_t operator()(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; } + uint8_t& operator()(uint32_t x, uint32_t y) { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; } + + bool operator< (const etc1_selector_palette_entry& other) const + { + for (uint32_t i = 0; i < 16; i++) + { + if (m_selectors[i] < other.m_selectors[i]) + return true; + else if (m_selectors[i] != other.m_selectors[i]) + return false; + } + + return false; + } + + bool operator== (const etc1_selector_palette_entry& other) const + { + for (uint32_t i = 0; i < 16; i++) + { + if (m_selectors[i] != other.m_selectors[i]) + return false; + } + + return true; + } + + private: + uint8_t m_selectors[16]; + }; + + typedef basisu::vector etc1_selector_palette_entry_vec; + + struct encoder_block + { + encoder_block() + { + clear(); + } + + uint32_t m_endpoint_predictor; + + int m_endpoint_index; + int m_selector_index; + + int m_selector_history_buf_index; + + bool m_is_cr_target; + void clear() + { + m_endpoint_predictor = 0; + + m_endpoint_index = 0; + m_selector_index = 0; + + m_selector_history_buf_index = 0; + m_is_cr_target = false; + } + }; + + typedef basisu::vector encoder_block_vec; + typedef vector2D encoder_block_vec2D; + + struct etc1_endpoint_palette_entry + { + etc1_endpoint_palette_entry() + { + clear(); + } + + color_rgba m_color5; + uint32_t m_inten5; + bool m_color5_valid; + + void clear() + { + clear_obj(*this); + } + }; + + typedef basisu::vector etc1_endpoint_palette_entry_vec; + + struct basisu_backend_params + { + bool m_etc1s; + bool m_debug, m_debug_images; + float m_endpoint_rdo_quality_thresh; + float m_selector_rdo_quality_thresh; + uint32_t m_compression_level; + + bool m_used_global_codebooks; + + bool m_validate; + + basisu_backend_params() + { + clear(); + } + + void clear() + { + m_etc1s = false; + m_debug = false; + m_debug_images = false; + m_endpoint_rdo_quality_thresh = 0.0f; + m_selector_rdo_quality_thresh = 0.0f; + m_compression_level = 0; + m_used_global_codebooks = false; + m_validate = true; + } + }; + + struct basisu_backend_slice_desc + { + basisu_backend_slice_desc() + { + clear(); + } + + void clear() + { + clear_obj(*this); + } + + uint32_t m_first_block_index; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + uint32_t m_num_macroblocks_x; + uint32_t m_num_macroblocks_y; + + uint32_t m_source_file_index; // also the basis image index + uint32_t m_mip_index; + bool m_alpha; + bool m_iframe; + }; + + typedef basisu::vector basisu_backend_slice_desc_vec; + + struct basisu_backend_output + { + basist::basis_tex_format m_tex_format; + + bool m_etc1s; + bool m_uses_global_codebooks; + bool m_srgb; + + uint32_t m_num_endpoints; + uint32_t m_num_selectors; + + uint8_vec m_endpoint_palette; + uint8_vec m_selector_palette; + + basisu_backend_slice_desc_vec m_slice_desc; + + uint8_vec m_slice_image_tables; + basisu::vector m_slice_image_data; + uint16_vec m_slice_image_crcs; + + basisu_backend_output() + { + clear(); + } + + void clear() + { + m_tex_format = basist::basis_tex_format::cETC1S; + m_etc1s = false; + m_uses_global_codebooks = false; + m_srgb = true; + + m_num_endpoints = 0; + m_num_selectors = 0; + + m_endpoint_palette.clear(); + m_selector_palette.clear(); + m_slice_desc.clear(); + m_slice_image_tables.clear(); + m_slice_image_data.clear(); + m_slice_image_crcs.clear(); + } + + uint32_t get_output_size_estimate() const + { + uint32_t total_compressed_bytes = (uint32_t)(m_slice_image_tables.size() + m_endpoint_palette.size() + m_selector_palette.size()); + for (uint32_t i = 0; i < m_slice_image_data.size(); i++) + total_compressed_bytes += (uint32_t)m_slice_image_data[i].size(); + + return total_compressed_bytes; + } + }; + + class basisu_backend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_backend); + + public: + + basisu_backend(); + + void clear(); + + void init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_desc); + + uint32_t encode(); + + const basisu_backend_output &get_output() const { return m_output; } + const basisu_backend_params& get_params() const { return m_params; } + + private: + basisu_frontend *m_pFront_end; + basisu_backend_params m_params; + basisu_backend_slice_desc_vec m_slices; + basisu_backend_output m_output; + + etc1_endpoint_palette_entry_vec m_endpoint_palette; + etc1_selector_palette_entry_vec m_selector_palette; + + struct etc1_global_selector_cb_entry_desc + { + uint32_t m_pal_index; + uint32_t m_mod_index; + bool m_was_used; + }; + + typedef basisu::vector etc1_global_selector_cb_entry_desc_vec; + + etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc; + + basisu::vector m_slice_encoder_blocks; + + // Maps OLD to NEW endpoint/selector indices + uint_vec m_endpoint_remap_table_old_to_new; + uint_vec m_endpoint_remap_table_new_to_old; + bool_vec m_old_endpoint_was_used; + bool_vec m_new_endpoint_was_used; + + uint_vec m_selector_remap_table_old_to_new; + + // Maps NEW to OLD endpoint/selector indices + uint_vec m_selector_remap_table_new_to_old; + + uint32_t get_total_slices() const + { + return (uint32_t)m_slices.size(); + } + + uint32_t get_total_slice_blocks() const + { + return m_pFront_end->get_total_output_blocks(); + } + + uint32_t get_block_index(uint32_t slice_index, uint32_t block_x, uint32_t block_y) const + { + const basisu_backend_slice_desc &slice = m_slices[slice_index]; + + assert((block_x < slice.m_num_blocks_x) && (block_y < slice.m_num_blocks_y)); + + return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x; + } + + uint32_t get_total_blocks(uint32_t slice_index) const + { + return m_slices[slice_index].m_num_blocks_x * m_slices[slice_index].m_num_blocks_y; + } + + uint32_t get_total_blocks() const + { + uint32_t total_blocks = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_blocks += get_total_blocks(i); + return total_blocks; + } + + // Returns the total number of input texels, not counting padding up to blocks/macroblocks. + uint32_t get_total_input_texels(uint32_t slice_index) const + { + return m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height; + } + + uint32_t get_total_input_texels() const + { + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_texels += get_total_input_texels(i); + return total_texels; + } + + int find_slice(uint32_t block_index, uint32_t *pBlock_x, uint32_t *pBlock_y) const + { + for (uint32_t i = 0; i < m_slices.size(); i++) + { + if ((block_index >= m_slices[i].m_first_block_index) && (block_index < (m_slices[i].m_first_block_index + m_slices[i].m_num_blocks_x * m_slices[i].m_num_blocks_y))) + { + const uint32_t ofs = block_index - m_slices[i].m_first_block_index; + const uint32_t x = ofs % m_slices[i].m_num_blocks_x; + const uint32_t y = ofs / m_slices[i].m_num_blocks_x; + + if (pBlock_x) *pBlock_x = x; + if (pBlock_y) *pBlock_y = y; + + return i; + } + } + return -1; + } + + void create_endpoint_palette(); + + void create_selector_palette(); + + // endpoint palette + // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices + // selector palette + // 4x4 2-bit selectors + + // per-macroblock: + // 4 diff bits + // 4 flip bits + // Endpoint template index, 1-8 endpoint indices + // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices + // 4 selector indices + + void reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec &all_endpoint_indices); + void sort_selector_codebook(); + void create_encoder_blocks(); + void compute_slice_crcs(); + bool encode_image(); + bool encode_endpoint_palette(); + bool encode_selector_palette(); + int find_video_frame(int slice_index, int delta); + void check_for_valid_cr_blocks(); + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_basis_file.h b/Source/ThirdParty/basis_universal/encoder/basisu_basis_file.h new file mode 100644 index 000000000..57448bccb --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_basis_file.h @@ -0,0 +1,70 @@ +// basisu_basis_file.h +// Copyright (C) 2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu_file_headers.h" +#include "basisu_backend.h" + +namespace basisu +{ + class basisu_file + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_file); + + public: + basisu_file() + { + } + + void clear() + { + m_comp_data.clear(); + + clear_obj(m_header); + m_images_descs.clear(); + + m_header_file_ofs = 0; + m_slice_descs_file_ofs = 0; + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = 0; + m_total_file_size = 0; + } + + bool init(const basisu_backend_output& encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame); + + const uint8_vec &get_compressed_data() const { return m_comp_data; } + + private: + basist::basis_file_header m_header; + basisu::vector m_images_descs; + + uint8_vec m_comp_data; + + uint32_t m_header_file_ofs; + uint32_t m_slice_descs_file_ofs; + uint32_t m_endpoint_cb_file_ofs; + uint32_t m_selector_cb_file_ofs; + uint32_t m_tables_file_ofs; + uint32_t m_first_image_file_ofs; + uint32_t m_total_file_size; + + void create_header(const basisu_backend_output& encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame); + bool create_image_descs(const basisu_backend_output& encoder_output); + void create_comp_data(const basisu_backend_output& encoder_output); + void fixup_crcs(); + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_bc7enc.h b/Source/ThirdParty/basis_universal/encoder/basisu_bc7enc.h new file mode 100644 index 000000000..af147c594 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_bc7enc.h @@ -0,0 +1,132 @@ +// File: basisu_bc7enc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + +#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_UBER_LEVEL (4) + + typedef uint8_t bc7enc_bool; + +#define BC7ENC_TRUE (1) +#define BC7ENC_FALSE (0) + + typedef struct { float m_c[4]; } bc7enc_vec4F; + + extern const float g_bc7_weights1x[2 * 4]; + extern const float g_bc7_weights2x[4 * 4]; + extern const float g_bc7_weights3x[8 * 4]; + extern const float g_bc7_weights4x[16 * 4]; + extern const float g_astc_weights4x[16 * 4]; + extern const float g_astc_weights5x[32 * 4]; + extern const float g_astc_weights_3levelsx[3 * 4]; + + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + + struct color_cell_compressor_params + { + uint32_t m_num_pixels; + const basist::color_quad_u8* m_pPixels; + + uint32_t m_num_selector_weights; + const uint32_t* m_pSelector_weights; + + const bc7enc_vec4F* m_pSelector_weightsx; + uint32_t m_comp_bits; + + const uint8_t *m_pForce_selectors; + + // Non-zero m_astc_endpoint_range enables ASTC mode. m_comp_bits and m_has_pbits are always false. We only support 2, 3, or 4 bit weight encodings. + uint32_t m_astc_endpoint_range; + + uint32_t m_weights[4]; + bc7enc_bool m_has_alpha; + bc7enc_bool m_has_pbits; + bc7enc_bool m_endpoints_share_pbit; + bc7enc_bool m_perceptual; + }; + + struct color_cell_compressor_results + { + uint64_t m_best_overall_err; + basist::color_quad_u8 m_low_endpoint; + basist::color_quad_u8 m_high_endpoint; + uint32_t m_pbits[2]; + uint8_t* m_pSelectors; + uint8_t* m_pSelectors_temp; + + // Encoded ASTC indices, if ASTC mode is enabled + basist::color_quad_u8 m_astc_low_endpoint; + basist::color_quad_u8 m_astc_high_endpoint; + }; + + struct bc7enc_compress_block_params + { + // m_max_partitions_mode1 may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions_mode1; + + // Relative RGBA or YCbCrA weights. + uint32_t m_weights[4]; + + // m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_uber_level; + + // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. + bc7enc_bool m_perceptual; + + uint32_t m_least_squares_passes; + }; + + uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); + + uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, + uint32_t num_pixels, const basist::color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]); + + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_FALSE; + p->m_weights[0] = 1; + p->m_weights[1] = 1; + p->m_weights[2] = 1; + p->m_weights[3] = 1; + } + + inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_TRUE; + p->m_weights[0] = 128; + p->m_weights[1] = 64; + p->m_weights[2] = 16; + p->m_weights[3] = 32; + } + + inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params* p) + { + p->m_max_partitions_mode1 = BC7ENC_MAX_PARTITIONS1; + p->m_least_squares_passes = 1; + p->m_uber_level = 0; + bc7enc_compress_block_params_init_perceptual_weights(p); + } + + // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). + void bc7enc_compress_block_init(); + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_comp.h b/Source/ThirdParty/basis_universal/encoder/basisu_comp.h new file mode 100644 index 000000000..92df9fd87 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_comp.h @@ -0,0 +1,1090 @@ +// basisu_comp.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_frontend.h" +#include "basisu_backend.h" +#include "basisu_basis_file.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_uastc_enc.h" +#include "basisu_uastc_hdr_4x4_enc.h" +#include "basisu_astc_hdr_6x6_enc.h" +#include "basisu_astc_ldr_encode.h" + +#define BASISU_LIB_VERSION 200 +#define BASISU_LIB_VERSION_STRING "2.00" + +#ifndef BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 is undefined +#endif +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #error BASISD_SUPPORT_KTX2_ZSTD is undefined +#endif + +#if !BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 must be enabled when building the encoder. To reduce code size if KTX2 support is not needed, set BASISD_SUPPORT_KTX2_ZSTD to 0 +#endif + +namespace basisu +{ + struct opencl_context; + typedef opencl_context* opencl_context_ptr; + + const uint32_t BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION = 16384; + + // Allow block's color distance to increase by 1.5 while searching for an alternative nearby endpoint. + const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; + + // Allow block's color distance to increase by 1.25 while searching the selector history buffer for a close enough match. + const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; + + const int BASISU_DEFAULT_QUALITY = 128; + const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; + + const uint32_t BASISU_MAX_IMAGE_DIMENSION = 16384; + const uint32_t BASISU_QUALITY_MIN = 1; + const uint32_t BASISU_QUALITY_MAX = 255; + const uint32_t BASISU_XUASTC_QUALITY_MIN = 1; + const uint32_t BASISU_XUASTC_QUALITY_MAX = 100; + const uint32_t BASISU_MAX_ENDPOINT_CLUSTERS = basisu_frontend::cMaxEndpointClusters; + const uint32_t BASISU_MAX_SELECTOR_CLUSTERS = basisu_frontend::cMaxSelectorClusters; + + const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; + + const int BASISU_RDO_UASTC_DICT_SIZE_DEFAULT = 4096; // 32768; + const int BASISU_RDO_UASTC_DICT_SIZE_MIN = 64; + const int BASISU_RDO_UASTC_DICT_SIZE_MAX = 65536; + + struct image_stats + { + image_stats() + { + clear(); + } + + void clear() + { + m_filename.clear(); + m_width = 0; + m_height = 0; + + m_basis_rgb_avg_psnr = 0.0f; + m_basis_rgb_avg_log2_psnr = 0.0f; + + m_basis_rgba_avg_psnr = 0.0f; + m_basis_a_avg_psnr = 0.0f; + m_basis_luma_709_psnr = 0.0f; + m_basis_luma_601_psnr = 0.0f; + m_basis_luma_709_ssim = 0.0f; + + m_basis_rgb_avg_bc6h_psnr = 0.0f; + m_basis_rgb_avg_bc6h_log2_psnr = 0.0f; + + m_bc7_rgb_avg_psnr = 0.0f; + m_bc7_rgba_avg_psnr = 0.0f; + m_bc7_a_avg_psnr = 0.0f; + m_bc7_luma_709_psnr = 0.0f; + m_bc7_luma_601_psnr = 0.0f; + m_bc7_luma_709_ssim = 0.0f; + + m_best_etc1s_rgb_avg_psnr = 0.0f; + m_best_etc1s_luma_709_psnr = 0.0f; + m_best_etc1s_luma_601_psnr = 0.0f; + m_best_etc1s_luma_709_ssim = 0.0f; + + m_opencl_failed = false; + } + + std::string m_filename; + uint32_t m_width; + uint32_t m_height; + + // .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics) + float m_basis_rgb_avg_psnr; + float m_basis_rgb_avg_log2_psnr; + + float m_basis_rgba_avg_psnr; + float m_basis_a_avg_psnr; + float m_basis_luma_709_psnr; + float m_basis_luma_601_psnr; + float m_basis_luma_709_ssim; + + // UASTC HDR only. + float m_basis_rgb_avg_bc6h_psnr; + float m_basis_rgb_avg_bc6h_log2_psnr; + + // LDR: BC7 statistics + float m_bc7_rgb_avg_psnr; + float m_bc7_rgba_avg_psnr; + float m_bc7_a_avg_psnr; + float m_bc7_luma_709_psnr; + float m_bc7_luma_601_psnr; + float m_bc7_luma_709_ssim; + + // LDR: Highest achievable quality ETC1S statistics, for development/comparison + float m_best_etc1s_rgb_avg_psnr; + float m_best_etc1s_luma_709_psnr; + float m_best_etc1s_luma_601_psnr; + float m_best_etc1s_luma_709_ssim; + + bool m_opencl_failed; + }; + + enum class hdr_modes + { + // standard but constrained ASTC HDR 4x4 tex data that can be rapidly transcoded to BC6H + cUASTC_HDR_4X4, + // standard RDO optimized or non-RDO (highest quality) ASTC HDR 6x6 tex data that can be rapidly re-encoded to BC6H + cASTC_HDR_6X6, + // a custom intermediate format based off ASTC HDR that can be rapidly decoded straight to ASTC HDR or re-encoded to BC6H + cUASTC_HDR_6X6_INTERMEDIATE, + cTotal + }; + + template + struct bool_param + { + bool_param() : + m_value(def), + m_changed(false) + { + } + + void clear() + { + m_value = def; + m_changed = false; + } + + operator bool() const + { + return m_value; + } + + bool operator= (bool v) + { + m_value = v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + bool m_value; + bool m_changed; + }; + + template + struct param + { + param(T def, T min_v, T max_v) : + m_value(def), + m_def(def), + m_min(min_v), + m_max(max_v), + m_changed(false) + { + } + + void clear() + { + m_value = m_def; + m_changed = false; + } + + operator T() const + { + return m_value; + } + + T operator= (T v) + { + m_value = clamp(v, m_min, m_max); + m_changed = true; + return m_value; + } + + T operator *= (T v) + { + m_value *= v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + T m_value; + T m_def; + T m_min; + T m_max; + bool m_changed; + }; + + // Low-level direct compressor parameters. + // Also see basis_compress() below for a simplified C-style interface. + struct basis_compressor_params + { + basis_compressor_params() : + m_xuastc_or_astc_ldr_basis_tex_format(-1, -1, INT_MAX), + // Note the ETC1S default compression/effort level is 2, not the command line default of 1. + m_etc1s_compression_level((int)BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_ETC1S_COMPRESSION_LEVEL), + m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), + m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), + m_mip_scale(1.0f, .000125f, 4.0f), + m_mip_smallest_dimension(1, 1, 16384), + m_etc1s_max_endpoint_clusters(512), + m_etc1s_max_selector_clusters(512), + m_quality_level(-1), + m_pack_uastc_ldr_4x4_flags(cPackUASTCLevelDefault), + m_rdo_uastc_ldr_4x4_quality_scalar(1.0f, 0.001f, 50.0f), + m_rdo_uastc_ldr_4x4_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX), + m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f), + m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f), + m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f), + m_rdo_uastc_ldr_4x4_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f), + m_resample_width(0, 1, 16384), + m_resample_height(0, 1, 16384), + m_resample_factor(0.0f, .00125f, 100.0f), + m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE), + m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX), + m_transcode_flags(0, 0, UINT32_MAX), + m_ldr_hdr_upconversion_nit_multiplier(0.0f, 0.0f, basist::MAX_HALF_FLOAT), + m_ldr_hdr_upconversion_black_bias(0.0f, 0.0f, 1.0f), + m_xuastc_ldr_effort_level(astc_ldr::EFFORT_LEVEL_DEF, astc_ldr::EFFORT_LEVEL_MIN, astc_ldr::EFFORT_LEVEL_MAX), + m_xuastc_ldr_syntax((int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd, (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith, (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd), + m_ls_min_psnr(35.0f, 0.0f, 100.0f), m_ls_min_alpha_psnr(38.0f, 0.0f, 100.0f), + m_ls_thresh_psnr(1.5f, 0.0f, 100.0f), m_ls_thresh_alpha_psnr(0.75f, 0.0f, 100.0f), + m_ls_thresh_edge_psnr(1.0f, 0.0f, 100.00f), m_ls_thresh_edge_alpha_psnr(0.5f, 0.0f, 100.00f), + m_pJob_pool(nullptr) + { + clear(); + } + + void clear() + { + m_format_mode = basist::basis_tex_format::cETC1S; + + m_uastc.clear(); + m_hdr.clear(); + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; + m_xuastc_or_astc_ldr_basis_tex_format = -1; + + m_use_opencl.clear(); + m_status_output.clear(); + + m_source_filenames.clear(); + m_source_alpha_filenames.clear(); + + m_source_images.clear(); + m_source_mipmap_images.clear(); + + m_out_filename.clear(); + + m_y_flip.clear(); + m_debug.clear(); + m_validate_etc1s.clear(); + m_debug_images.clear(); + m_perceptual.clear(); + m_no_selector_rdo.clear(); + m_selector_rdo_thresh.clear(); + m_read_source_images.clear(); + m_write_output_basis_or_ktx2_files.clear(); + m_etc1s_compression_level.clear(); + m_compute_stats.clear(); + m_print_stats.clear(); + m_check_for_alpha.clear(); + m_force_alpha.clear(); + m_multithreading.clear(); + m_swizzle[0] = 0; + m_swizzle[1] = 1; + m_swizzle[2] = 2; + m_swizzle[3] = 3; + m_renormalize.clear(); + m_disable_hierarchical_endpoint_codebooks.clear(); + + m_no_endpoint_rdo.clear(); + m_endpoint_rdo_thresh.clear(); + + m_mip_gen.clear(); + m_mip_scale.clear(); + m_mip_filter = "kaiser"; + m_mip_scale = 1.0f; + m_mip_srgb.clear(); + m_mip_premultiplied.clear(); + m_mip_renormalize.clear(); + m_mip_wrapping.clear(); + m_mip_fast.clear(); + m_mip_smallest_dimension.clear(); + + m_etc1s_max_endpoint_clusters = 0; + m_etc1s_max_selector_clusters = 0; + m_quality_level = -1; + + m_tex_type = basist::cBASISTexType2D; + m_userdata0 = 0; + m_userdata1 = 0; + m_us_per_frame = 0; + + m_pack_uastc_ldr_4x4_flags = cPackUASTCLevelDefault; + m_rdo_uastc_ldr_4x4.clear(); + m_rdo_uastc_ldr_4x4_quality_scalar.clear(); + m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale.clear(); + m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev.clear(); + m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio.clear(); + m_rdo_uastc_ldr_4x4_skip_block_rms_thresh.clear(); + m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode.clear(); + m_rdo_uastc_ldr_4x4_multithreading.clear(); + + m_resample_width.clear(); + m_resample_height.clear(); + m_resample_factor.clear(); + + m_pGlobal_codebooks = nullptr; + + m_create_ktx2_file.clear(); + m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; + m_ktx2_key_values.clear(); + m_ktx2_zstd_supercompression_level.clear(); + m_ktx2_and_basis_srgb_transfer_function.clear(); + + m_validate_output_data.clear(); + m_transcode_flags.clear(); + + m_ldr_hdr_upconversion_srgb_to_linear.clear(); + + m_hdr_favor_astc.clear(); + + m_uastc_hdr_4x4_options.init(); + m_astc_hdr_6x6_options.clear(); + + m_ldr_hdr_upconversion_nit_multiplier.clear(); + m_ldr_hdr_upconversion_black_bias.clear(); + + m_xuastc_ldr_effort_level.clear(); + m_xuastc_ldr_use_dct.clear(); + m_xuastc_ldr_use_lossy_supercompression.clear(); + m_xuastc_ldr_force_disable_subsets.clear(); + m_xuastc_ldr_force_disable_rgb_dual_plane.clear(); + m_xuastc_ldr_syntax.clear(); + + m_ls_min_psnr.clear(); + m_ls_min_alpha_psnr.clear(); + m_ls_thresh_psnr.clear(); + m_ls_thresh_alpha_psnr.clear(); + m_ls_thresh_edge_psnr.clear(); + m_ls_thresh_edge_alpha_psnr.clear(); + for (uint32_t i = 0; i < 4; i++) + m_xuastc_ldr_channel_weights[i] = 1; + m_xuastc_ldr_blurring.clear(); + + m_pJob_pool = nullptr; + } + + // Configures the compressor's mode by setting the proper parameters (which were preserved for backwards compatibility with old code). + // This is by far the preferred way of controlling which codec mode the compressor will select. + void set_format_mode(basist::basis_tex_format mode) + { + m_format_mode = mode; + + switch (mode) + { + case basist::basis_tex_format::cETC1S: + { + // ETC1S + m_xuastc_or_astc_ldr_basis_tex_format = -1; + m_hdr = false; + m_uastc = false; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter + break; + } + case basist::basis_tex_format::cUASTC_LDR_4x4: + { + // UASTC LDR 4x4 + m_xuastc_or_astc_ldr_basis_tex_format = -1; + m_hdr = false; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter + break; + } + case basist::basis_tex_format::cUASTC_HDR_4x4: + { + // UASTC HDR 4x4 + m_xuastc_or_astc_ldr_basis_tex_format = -1; + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6: + { + // ASTC HDR 6x6 + m_xuastc_or_astc_ldr_basis_tex_format = -1; + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cASTC_HDR_6X6; + break; + } + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: + { + // UASTC HDR 6x6 + m_xuastc_or_astc_ldr_basis_tex_format = -1; + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE; + break; + } + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + // ASTC LDR 4x4-12x12 or XUASTC LDR 4x4-12x12 + m_xuastc_or_astc_ldr_basis_tex_format = (int)mode; + m_hdr = false; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter + break; + } + default: + assert(0); + break; + } + } + + // Like set_format_mode() but also sets the effort and quality parameters appropriately for the selected mode. + // "Effort" (perf. vs. highest achievable quality) and "quality" (quality vs. bitrate) parameters are now mode dependent. + // Effort ranges from [0,10] and quality ranges from [1,100], unless they are -1 in which case you get the codec's default settings. + bool set_format_mode_and_effort(basist::basis_tex_format mode, int effort = -1, bool set_defaults = true); + bool set_format_mode_and_quality_effort(basist::basis_tex_format mode, int quality = -1, int effort = -1, bool set_defaults = true); + + // Sets all the sRGB-related options (m_perceptual, m_mip_srgb, m_ktx2_and_basis_srgb_transfer_function) to the specified value. + void set_srgb_options(bool srgb_flag) + { + m_perceptual = srgb_flag; + m_mip_srgb = srgb_flag; + m_ktx2_and_basis_srgb_transfer_function = srgb_flag; + } + + // Simpler helpers - I wish this was easier, but backwards API compat is also valuable. + bool is_etc1s() const + { + return !m_uastc; + } + + bool is_uastc_ldr_4x4() const + { + return m_uastc && !m_hdr && (m_xuastc_or_astc_ldr_basis_tex_format == -1); + } + + bool is_uastc_hdr_4x4() const + { + return m_uastc && m_hdr && (m_hdr_mode == hdr_modes::cUASTC_HDR_4X4); + } + + // By default we generate LDR ETC1S data. + // Ideally call set_format_mode() above instead of directly manipulating the below fields. These individual parameters are for backwards API compatibility. + // - If m_uastc is false you get ETC1S (the default). + // - If m_uastc is true, and m_hdr is not true, and m_xuastc_or_astc_ldr_basis_tex_format==-1, we generate UASTC 4x4 LDR data (8bpp with or without RDO). + // - If m_uastc is true, and m_hdr is not true, and m_xuastc_or_astc_ldr_basis_tex_format!=-1, we generate XUASTC 4x4-12x12 or ASTC 4x4-12x12 LDR data, controlled by m_xuastc_or_astc_ldr_basis_tex_format. + // - If m_uastc is true and m_hdr is true, we generate 4x4 or 6x6 HDR data, controlled by m_hdr_mode. + + // True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S. + // Should be true for any non-ETC1S format (UASTC 4x4 LDR, UASTC 4x4 HDR, RDO ASTC 6x6 HDR, UASTC 6x6 HDR, or ASTC/XUASTC LDR 4x4-12x12). + // Note: Ideally call set_format_mode() or set_format_mode_and_quality_effort() above instead. + // Many of these individual parameters are for backwards API compatibility. + bool_param m_uastc; + + // Set m_hdr to true to switch to UASTC HDR mode. m_hdr_mode then controls which format is output. + // m_hdr_mode then controls which format is output (4x4, 6x6, or 6x6 intermediate). + // Note: Ideally call set_format_mode() instead. This is for backwards API compatibility. + bool_param m_hdr; + + // If m_hdr is true, this specifies which mode we operate in (currently UASTC 4x4 HDR or ASTC 6x6 HDR). Defaults to UASTC 4x4 HDR for backwards compatibility. + // Note: Ideally call set_format_mode() instead. This is for backwards API compatibility. + hdr_modes m_hdr_mode; + + // If not -1: Generate XUASTC or ASTC LDR 4x4-12x12 files in the specified basis_tex_format (which also sets the ASTC block size). If -1 (the default), don't generate XUASTC/ASTC LDR files. + // m_uastc must also be set to true if this is not -1. + // Note: Ideally call set_format_mode() instead. + param m_xuastc_or_astc_ldr_basis_tex_format; // enum basis_tex_format + + // True to enable OpenCL if it's available. The compressor will fall back to CPU encoding if something goes wrong. + bool_param m_use_opencl; + + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. + // Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr. + basisu::vector m_source_filenames; + basisu::vector m_source_alpha_filenames; + + // An array of 2D LDR/SDR source images. + basisu::vector m_source_images; + + // An array of 2D HDR source images. + basisu::vector m_source_images_hdr; + + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. + // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. + // The compressor applies the user-provided swizzling (in m_swizzle) to these images. + basisu::vector< basisu::vector > m_source_mipmap_images; + + basisu::vector< basisu::vector > m_source_mipmap_images_hdr; + + // Filename of the output basis/ktx2 file + std::string m_out_filename; + + // The params are done this way so we can detect when the user has explictly changed them. + + // Flip images across Y axis + bool_param m_y_flip; + + // If true, the compressor will print basis status to stdout during compression. + bool_param m_status_output; + + // Output debug information during compression + bool_param m_debug; + + // Low-level ETC1S data validation during encoding (slower/development). + bool_param m_validate_etc1s; + + // m_debug_images is pretty slow + bool_param m_debug_images; + + // ETC1S compression effort level, from 0 to BASISU_MAX_ETC1S_COMPRESSION_LEVEL (higher is slower). + // This parameter controls numerous internal encoding speed vs. compression efficiency/performance tradeoffs. + // Note this is NOT the same as the ETC1S quality level, and most users shouldn't change this. + param m_etc1s_compression_level; + + // Use perceptual sRGB colorspace metrics instead of linear. + // Note: You probably also want to set m_ktx2_srgb_transfer_func to match. + // Note: This member variable was previously called "m_perceptual". + bool_param m_perceptual; + + // Disable selector RDO, for faster compression but larger files + bool_param m_no_selector_rdo; + param m_selector_rdo_thresh; + + bool_param m_no_endpoint_rdo; + param m_endpoint_rdo_thresh; + + // Read source images from m_source_filenames/m_source_alpha_filenames + bool_param m_read_source_images; + + // Write the output basis/ktx2 file to disk using m_out_filename + bool_param m_write_output_basis_or_ktx2_files; + + // Compute and display image metrics + bool_param m_compute_stats; + + // Print stats to stdout, if m_compute_stats is true. + bool_param m_print_stats; + + // Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels + bool_param m_check_for_alpha; + + // Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha + bool_param m_force_alpha; + + // True to enable multithreading in various compressors. + // Note currently, some compressors (like ASTC/XUASTC LDR) will utilize threading anyway if the job pool is more than one thread. + bool_param m_multithreading; + + // Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels + uint8_t m_swizzle[4]; + + // Renormalize normal map normals after loading image + bool_param m_renormalize; + + // If true the front end will not use 2 level endpoint codebook searching, for slightly higher quality but much slower execution. + // Note some m_etc1s_compression_level's disable this automatically. + bool_param m_disable_hierarchical_endpoint_codebooks; + + // mipmap generation parameters + bool_param m_mip_gen; + param m_mip_scale; + std::string m_mip_filter; + bool_param m_mip_srgb; + bool_param m_mip_premultiplied; // not currently supported + bool_param m_mip_renormalize; + bool_param m_mip_wrapping; + bool_param m_mip_fast; + param m_mip_smallest_dimension; + + // ETC1S codebook size (quality) control. + // If m_etc1s_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. + // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. + uint32_t m_etc1s_max_endpoint_clusters; + uint32_t m_etc1s_max_selector_clusters; + + // Quality level (bitrate vs. distortion tradeoff) control for ETC1S or XUASTC LDR 4x4-12x12 (must not be -1 for DCT to be used in XUASTC LDR 4x4 mode) + int m_quality_level; + + // m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the .basis file header. + basist::basis_texture_type m_tex_type; + uint32_t m_userdata0; + uint32_t m_userdata1; + uint32_t m_us_per_frame; + + // UASTC LDR 4x4 parameters + // cPackUASTCLevelDefault, etc. + uint32_t m_pack_uastc_ldr_4x4_flags; + bool_param m_rdo_uastc_ldr_4x4; + param m_rdo_uastc_ldr_4x4_quality_scalar; // RDO lambda for UASTC 4x4 LDR + param m_rdo_uastc_ldr_4x4_dict_size; + param m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale; + param m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev; + param m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio; + param m_rdo_uastc_ldr_4x4_skip_block_rms_thresh; + bool_param m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode; + bool_param m_rdo_uastc_ldr_4x4_multithreading; + + // Resample input texture after loading + param m_resample_width; + param m_resample_height; + param m_resample_factor; + + // ETC1S global codebook control + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + // KTX2 specific parameters. + // Internally, the compressor always creates a .basis file then it converts that lossless to KTX2. + bool_param m_create_ktx2_file; + basist::ktx2_supercompression m_ktx2_uastc_supercompression; + basist::ktx2_transcoder::key_value_vec m_ktx2_key_values; + param m_ktx2_zstd_supercompression_level; + + // Note: The default for this parameter (which used to be "m_ktx2_srgb_transfer_func") used to be false, now setting this to true and renaming to m_ktx2_and_basis_srgb_transfer_function. + // Also see m_perceptual and m_mip_srgb, which should in most uses be the same. + // This also controls the XUASTC LDR ASTC decode profile (linear vs. sRGB) in the simulated decoder block. + // For XUASTC LDR, it's also still used when generating .basis files vs. .KTX2. + bool_param m_ktx2_and_basis_srgb_transfer_function; // false = linear transfer function, true = sRGB transfer function + + uastc_hdr_4x4_codec_options m_uastc_hdr_4x4_options; + astc_6x6_hdr::astc_hdr_6x6_global_config m_astc_hdr_6x6_options; + + // True to try transcoding the generated output after compression to a few formats. + bool_param m_validate_output_data; + + // The flags to use while transcoding if m_validate_output_data + param m_transcode_flags; + + // LDR->HDR upconversion parameters. + // + // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion), or absolute luminance (nits or candelas per meter squared), and then processed as HDR. + // Otherwise, LDR images are assumed to already be in linear light (i.e. they don't use the sRGB transfer function). + bool_param m_ldr_hdr_upconversion_srgb_to_linear; + + // m_ldr_hdr_upconversion_nit_multiplier is only used when loading SDR/LDR images and compressing to an HDR output format. + // By default m_ldr_hdr_upconversion_nit_multiplier is 0. It's an override for the default. + // When loading LDR images, a default multiplier of 1.0 will be used in UASTC 4x4 HDR mode. Partially for backwards compatibility with previous library releases, and also because it doesn't really matter with this encoder what the multiplier is. + // With the 6x6 HDR encoder it does matter because it expects inputs in absolute nits, so the LDR upconversion luminance multiplier default will be 100 nits. (Most SDR monitors were/are 80-100 nits or so.) + param m_ldr_hdr_upconversion_nit_multiplier; + + // The optional sRGB space bias to use during LDR->HDR upconversion. Should be between [0,.49] or so. Only applied on black (0.0) color components. + // Defaults to no bias (0.0f). + param m_ldr_hdr_upconversion_black_bias; + + // If true, ASTC HDR quality is favored more than BC6H quality by the dual target encoder. Otherwise it's a rough balance. + // UASTC HDR 4x4 + bool_param m_hdr_favor_astc; + + // XUASTC LDR 4x4-12x12 specific options + param m_xuastc_ldr_effort_level; + bool_param m_xuastc_ldr_use_dct; // set the DCT quality above using m_quality_level, [1,100] + bool_param m_xuastc_ldr_use_lossy_supercompression; // allows the compressor to introduce a bounded amount of distortion if doing so would make smaller files (actually ASTC or XUASTC) + bool_param m_xuastc_ldr_force_disable_subsets; // disable 2-3 subset usage in all effort levels, faster encoding, faster transcoding to BC7, but lower quality) + bool_param m_xuastc_ldr_force_disable_rgb_dual_plane; // disable RGB dual plane usage (still can use dual plane on alpha blocks), for faster transcoding to BC7 but lower quality + param m_xuastc_ldr_syntax; // favor faster decompression over ratio, default is basist::astc_ldr_t::xuastc_ldr_syntax::cFullZstd (fastest transcoding but lower ratio) + uint32_t m_xuastc_ldr_channel_weights[4]; + bool_param m_xuastc_ldr_blurring; // experimental, not recommended, very slow + + // XUASTC Lossy supercompression PSNR threshold parameters + param m_ls_min_psnr, m_ls_min_alpha_psnr; + param m_ls_thresh_psnr, m_ls_thresh_alpha_psnr; + param m_ls_thresh_edge_psnr, m_ls_thresh_edge_alpha_psnr; + + // Job pool, MUST not be nullptr; + job_pool *m_pJob_pool; + + // Returns the current format mode as set by set_format_mode() above. + // Because of backwards API compatibility we don't use this directly yet, it's just here to aid the transition to the new API. + basist::basis_tex_format get_format_mode() const { return m_format_mode; } + + private: + // This is set by set_format_mode() above. For backwards API compat we don't use it directly, it's just here to aid the transition to the new API. + basist::basis_tex_format m_format_mode; + }; + + // Important: basisu_encoder_init() MUST be called first before using this class. + class basis_compressor + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basis_compressor); + + public: + basis_compressor(); + ~basis_compressor(); + + // Note it *should* be possible to call init() multiple times with different inputs, but this scenario isn't well tested. Ideally, create 1 object, compress, then delete it. + bool init(const basis_compressor_params ¶ms); + + enum error_code + { + cECSuccess = 0, + cECFailedInitializing, + cECFailedReadingSourceImages, + cECFailedValidating, + cECFailedEncodeUASTC, + cECFailedFrontEnd, + cECFailedFontendExtract, + cECFailedBackend, + cECFailedCreateBasisFile, + cECFailedWritingOutput, + cECFailedUASTCRDOPostProcess, + cECFailedCreateKTX2File, + cECFailedInvalidParameters + }; + + error_code process(); + + // The output .basis file will always be valid of process() succeeded. + const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } + + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. + const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } + + const basisu::vector &get_stats() const { return m_stats; } + + // Sum of all slice orig pixels. Intended for statistics display. + uint64_t get_total_slice_orig_texels() const { return m_total_slice_orig_texels; } + + uint64_t get_basis_file_size() const { return m_basis_file_size; } + double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } + + uint64_t get_ktx2_file_size() const { return m_ktx2_file_size; } + double get_ktx2_bits_per_texel() const { return m_ktx2_bits_per_texel; } + + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } + + bool get_opencl_failed() const { return m_opencl_failed; } + + private: + basis_compressor_params m_params; + + opencl_context_ptr m_pOpenCL_context; + + // the output mode/codec + basist::basis_tex_format m_fmt_mode; + + // the output mode/codec's block width/height + uint32_t m_fmt_mode_block_width; + uint32_t m_fmt_mode_block_height; + + // Note these images are expanded if necessary (duplicating cols/rows) to account for block dimensions. + basisu::vector m_slice_images; + basisu::vector m_slice_images_hdr; + + basisu::vector m_stats; + + uint64_t m_total_slice_orig_texels; + + uint64_t m_basis_file_size; + double m_basis_bits_per_texel; + + uint64_t m_ktx2_file_size; + double m_ktx2_bits_per_texel; + + basisu_backend_slice_desc_vec m_slice_descs; + + uint32_t m_total_blocks; + + basisu_frontend m_frontend; + + // These are 4x4 blocks. + pixel_block_vec m_source_blocks; + pixel_block_hdr_vec m_source_blocks_hdr; + + basisu::vector m_frontend_output_textures; + + basisu::vector m_best_etc1s_images; + basisu::vector m_best_etc1s_images_unpacked; + + basisu_backend m_backend; + + basisu_file m_basis_file; + + basisu::vector m_decoded_output_textures; // BC6H in HDR mode + basisu::vector m_decoded_output_textures_unpacked; + + basisu::vector m_decoded_output_textures_bc7; + basisu::vector m_decoded_output_textures_unpacked_bc7; + + basisu::vector m_decoded_output_textures_bc6h_hdr_unpacked; // BC6H in HDR mode + + basisu::vector m_decoded_output_textures_astc_hdr; + basisu::vector m_decoded_output_textures_astc_hdr_unpacked; + + uint8_vec m_output_basis_file; + uint8_vec m_output_ktx2_file; + + basisu::vector m_uastc_slice_textures; + basisu_backend_output m_uastc_backend_output; + + // The amount the HDR input has to be scaled up in case it had to be rescaled to fit into half floats. + float m_hdr_image_scale; + + // The upconversion multiplier used to load LDR images in HDR mode. + float m_ldr_to_hdr_upconversion_nit_multiplier; + + // True if any loaded source images were LDR and upconverted to HDR. + bool m_upconverted_any_ldr_images; + + bool m_any_source_image_has_alpha; + + bool m_opencl_failed; + + void check_for_hdr_inputs(); + bool sanity_check_input_params(); + void clean_hdr_image(imagef& src_img); + bool read_dds_source_images(); + bool read_source_images(); + bool extract_source_blocks(); + bool process_frontend(); + bool extract_frontend_texture_data(); + bool process_backend(); + bool create_basis_file_and_transcode(); + bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height); + bool write_output_files_and_compute_stats(); + error_code encode_slices_to_astc_6x6_hdr(); + error_code encode_slices_to_uastc_4x4_hdr(); + error_code encode_slices_to_uastc_4x4_ldr(); + error_code encode_slices_to_xuastc_or_astc_ldr(); + bool generate_mipmaps(const imagef& img, basisu::vector& mips, bool has_alpha); + bool generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha); + bool validate_texture_type_constraints(); + bool validate_ktx2_constraints(); + bool get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); + bool create_ktx2_file(); + bool pick_format_mode(); + + uint32_t get_block_width() const { return m_fmt_mode_block_width; } + uint32_t get_block_height() const { return m_fmt_mode_block_height; } + }; + + // Alternative simple C-style wrapper API around the basis_compressor class. + // This doesn't expose every encoder feature, but it's enough to get going. + // Important: basisu_encoder_init() MUST be called first before calling these functions. + // + // Input parameters: + // source_images: Array of "image" objects, one per mipmap level, largest mipmap level first. + // OR + // pImageRGBA: pointer to a 32-bpp RGBx or RGBA raster image, R first in memory, A last. Top scanline first in memory. + // width/height/pitch_in_pixels: dimensions of pImageRGBA + // + // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC quality or effort level. + // Note: basis_compress2() variants below accept the new-style "quality_level" (0-100) and "effort_level" (0-10) parameters instead of packing them into flags_and_quality. + // In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files) + // In UASTC LDR 4x4 mode, the lower 8-bits are the UASTC LDR/HDR pack or effort level (see cPackUASTCLevelFastest to cPackUASTCLevelVerySlow). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR. + // In UASTC HDR 4x4 mode, the lower 8-bits are the codec's effort level. Valid range is [uastc_hdr_4x4_codec_options::cMinLevel, uastc_hdr_4x4_codec_options::cMaxLevel]. Higher=better quality, but slower. + // In RDO ASTC HDR 6x6/UASTC HDR 6x6 mode, the lower 8-bits are the codec's effort level. Valid range is [0,astc_6x6_hdr::ASTC_HDR_6X6_MAX_USER_COMP_LEVEL]. Higher levels=better quality, but slower. + // In XUASTC/ASTC LDR 4x4-12x12 mode, the lower 8-bits are the compressor's effort level from [0,10] (astc_ldr_t::EFFORT_LEVEL_MIN, astc_ldr_t::EFFORT_LEVEL_MAX). + // + // float uastc_rdo_or_dct_quality: + // UASTC LDR 4x4 RDO quality level: RDO lambda setting - 0=no change/highest quality. Higher values lower quality but increase compressibility, initially try .5-1.5. + // RDO ASTC 6x6 HDR/UASTC 6x6 HDR: RDO lambda setting. 0=no change/highest quality. Higher values lower quality but increase compressibility, initially try 250-2000 (HDR) or 1000-10000 (LDR/SDR inputs upconverted to HDR). + // In XUASTC/ASTC LDR 4x4-12x12 mode, this is the [1,100] weight grid DCT quality level. + // + // pSize: Returns the output data's compressed size in bytes + // + // Return value is the compressed .basis or .ktx2 file data, or nullptr on failure. Must call basis_free() to free it. + enum + { + cFlagUseOpenCL = 1 << 8, // use OpenCL if available + cFlagThreaded = 1 << 9, // use multiple threads for compression + cFlagDebug = 1 << 10, // enable debug output + + cFlagKTX2 = 1 << 11, // generate a KTX2 file + cFlagKTX2UASTCSuperCompression = 1 << 12, // use KTX2 Zstd supercompression on non-supercompressed formats that support it. + + cFlagSRGB = 1 << 13, // input texture is sRGB, use perceptual colorspace metrics, also use sRGB filtering during mipmap gen, and also sets KTX2/.basis output transfer func to sRGB + cFlagGenMipsClamp = 1 << 14, // generate mipmaps with clamp addressing + cFlagGenMipsWrap = 1 << 15, // generate mipmaps with wrap addressing + + cFlagYFlip = 1 << 16, // flip source image on Y axis before compression + + // Note 11/18/2025: cFlagUASTCRDO flag is now ignored. Now if uastc_rdo_or_dct_quality>0 in UASTC LDR 4x4 mode, you automatically get RDO. + //cFlagUASTCRDO = 1 << 17, // use RDO postprocessing when generating UASTC LDR 4x4 files (must set uastc_rdo_or_dct_quality to the quality scalar) + + cFlagPrintStats = 1 << 18, // print image stats to stdout + cFlagPrintStatus = 1 << 19, // print status to stdout + + cFlagDebugImages = 1 << 20, // enable debug image generation (for development, slower) + + cFlagREC2020 = 1 << 21, // treat input as REC 2020 vs. the default 709 (for codecs that support this, currently UASTC HDR and ASTC 6x6), bit is always placed into KTX2 DFD + + cFlagValidateOutput = 1 << 22, // transcode the output after encoding for testing + + // XUASTC LDR profile: full arith, hybrid or full zstd (see basist::astc_ldr_t::xuastc_ldr_syntax) + cFlagXUASTCLDRSyntaxFullArith = 0 << 23, + cFlagXUASTCLDRSyntaxHybrid = 1 << 23, + cFlagXUASTCLDRSyntaxFullZStd = 2 << 23, + + cFlagXUASTCLDRSyntaxShift = 23, + cFlagXUASTCLDRSyntaxMask = 3, + + // Texture Type: 2D, 2D Array, Cubemap Array, or Texture Video (see enum basis_texture_type). Defaults to plain 2D. + cFlagTextureType2D = 0 << 25, + cFlagTextureType2DArray = 1 << 25, + cFlagTextureTypeCubemapArray = 2 << 25, + cFlagTextureTypeVideoFrames = 3 << 25, + + cFlagTextureTypeShift = 25, + cFlagTextureTypeMask = 3, + }; + + void* basis_compress_internal( + basist::basis_tex_format mode, + const basisu::vector* pSource_images, + const basisu::vector* pSource_images_hdr, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats, + int quality_level = -1, int effort_level = -1); + + // This function accepts an array of source images. + // If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled. + // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. + // Important: The returned block MUST be manually freed using basis_free_data(). + // basisu_encoder_init() MUST be called first! + // LDR version. + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector &source_images, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // HDR-only version. + // Important: The returned block MUST be manually freed using basis_free_data(). + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above. + // Important: The returned block MUST be manually freed using basis_free_data(). + void* basis_compress( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // basis_compress2() variants accept the new unified quality_level and effort_level parameters instead of the old flags/float uastc_rdo_or_dct_quality parameter. + // quality_level must be [0,100], effort_level [0,10]. + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats = nullptr); + + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats = nullptr); + + void* basis_compress2( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats = nullptr); + + // Frees the dynamically allocated file data returned by basis_compress(). + // This MUST be called on the pointer returned by basis_compress() when you're done with it. + void basis_free_data(void* p); + + // Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled. + // Returns true if opencl is worth using on this system, otherwise false. + // If pOpenCL_failed is not null, it will be set to true if OpenCL encoding failed *on this particular machine/driver/BasisU version* and the encoder falled back to CPU encoding. + // basisu_encoder_init() MUST be called first. If OpenCL support wasn't enabled this always returns false. + bool basis_benchmark_etc1s_opencl(bool *pOpenCL_failed = nullptr); + + // Parallel compression API + struct parallel_results + { + double m_total_time; + basis_compressor::error_code m_error_code; + uint8_vec m_basis_file; + uint8_vec m_ktx2_file; + basisu::vector m_stats; + double m_basis_bits_per_texel; + bool m_any_source_image_has_alpha; + + parallel_results() + { + clear(); + } + + void clear() + { + m_total_time = 0.0f; + m_error_code = basis_compressor::cECFailedInitializing; + m_basis_file.clear(); + m_ktx2_file.clear(); + m_stats.clear(); + m_basis_bits_per_texel = 0.0f; + m_any_source_image_has_alpha = false; + } + }; + + // Compresses an array of input textures across total_threads threads using the basis_compressor class. + // Compressing multiple textures at a time is substantially more efficient than just compressing one at a time. + // total_threads must be >= 1. + bool basis_parallel_compress( + uint32_t total_threads, + const basisu::vector ¶ms_vec, + basisu::vector< parallel_results > &results_vec); + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_enc.h b/Source/ThirdParty/basis_universal/encoder/basisu_enc.h new file mode 100644 index 000000000..d0b77415e --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_enc.h @@ -0,0 +1,4398 @@ +// basisu_enc.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "../transcoder/basisu_transcoder_internal.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(_WIN32) || defined(__MINGW32__) +#include +#endif + +// This module is really just a huge grab bag of classes and helper functions needed by the encoder. + +#if BASISU_SUPPORT_SSE +// Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors. +extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16); +#endif + +namespace basisu +{ + extern uint8_t g_hamming_dist[256]; + extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; + extern float g_srgb_to_linear_table[256]; // sRGB EOTF->linear light [0,1], 1=~100 nits + + // true if basisu_encoder_init() has been called and returned. + extern bool g_library_initialized; + + // Encoder library initialization. + // This function MUST be called before encoding anything! + // Returns false if library initialization fails. + bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); + void basisu_encoder_deinit(); + + // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 + extern void detect_sse41(); + +#if BASISU_SUPPORT_SSE + extern bool g_cpu_supports_sse41; +#else + const bool g_cpu_supports_sse41 = false; +#endif + + void error_vprintf(const char* pFmt, va_list args); + void error_printf(const char *pFmt, ...); + + template + inline void fmt_error_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return; + error_printf("%s", res.c_str()); + } + + void platform_sleep(uint32_t ms); + + // Helpers + + inline uint8_t clamp255(int32_t i) + { + return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); + } + + inline int left_shift32(int val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return static_cast(static_cast(val) << shift); + } + + inline uint32_t left_shift32(uint32_t val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return val << shift; + } + + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; + } + + inline uint8_t mul_8(uint32_t v, uint32_t a) + { + v = v * a + 128; + return (uint8_t)((v + (v >> 8)) >> 8); + } + + inline int fast_roundf_pos_int(float x) + { + assert(x >= 0.0f); + return (int)(x + 0.5f); + } + + inline int fast_roundf_int(float x) + { + return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); + } + + inline int fast_floorf_int(float x) + { + int xi = (int)x; // Truncate towards zero + return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi; + } + + inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + // Closed interval + inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + + inline bool equal_rel_tol(float a, float b, float rel_tol) + { + float diff = std::fabs(a - b); + float max_abs = std::max(std::fabs(a), std::fabs(b)); + return diff <= (max_abs * rel_tol); + } + + inline bool equal_rel_tol(double a, double b, double rel_tol) + { + double diff = std::fabs(a - b); + double max_abs = std::max(std::fabs(a), std::fabs(b)); + return diff <= (max_abs * rel_tol); + } + + inline uint32_t clz(uint32_t x) + { + if (!x) + return 32; + + uint32_t n = 0; + while ((x & 0x80000000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + bool string_begins_with(const std::string& str, const char* pPhrase); + + // Case sensitive, returns -1 if can't find + inline int string_find_first(const std::string& str, const char* pPhrase) + { + size_t res = str.find(pPhrase, 0); + if (res == std::string::npos) + return -1; + return (int)res; + } + + // Hashing + + inline uint32_t bitmix32c(uint32_t v) + { + v = (v + 0x7ed55d16) + (v << 12); + v = (v ^ 0xc761c23c) ^ (v >> 19); + v = (v + 0x165667b1) + (v << 5); + v = (v + 0xd3a2646c) ^ (v << 9); + v = (v + 0xfd7046c5) + (v << 3); + v = (v ^ 0xb55a4f09) ^ (v >> 16); + return v; + } + + inline uint32_t bitmix32(uint32_t v) + { + v -= (v << 6); + v ^= (v >> 17); + v -= (v << 9); + v ^= (v << 4); + v -= (v << 3); + v ^= (v << 10); + v ^= (v >> 15); + return v; + } + + inline uint32_t wang_hash(uint32_t seed) + { + seed = (seed ^ 61) ^ (seed >> 16); + seed *= 9; + seed = seed ^ (seed >> 4); + seed *= 0x27d4eb2d; + seed = seed ^ (seed >> 15); + return seed; + } + + class running_stat + { + public: + running_stat() { clear(); } + + void clear() + { + m_n = 0; + m_total = 0; + m_old_m = 0; + m_new_m = 0; + m_old_s = 0; + m_new_s = 0; + m_min = 0; + m_max = 0; + } + + void push(double x) + { + m_n++; + m_total += x; + if (m_n == 1) + { + m_old_m = m_new_m = x; + m_old_s = 0.0; + m_min = x; + m_max = x; + } + else + { + // See Knuth TAOCP vol 2, 3rd edition, page 232 + m_new_m = m_old_m + (x - m_old_m) / m_n; + m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); + m_old_m = m_new_m; + m_old_s = m_new_s; + m_min = basisu::minimum(x, m_min); + m_max = basisu::maximum(x, m_max); + } + } + + uint32_t get_num() const + { + return m_n; + } + + double get_total() const + { + return m_total; + } + + double get_mean() const + { + return (m_n > 0) ? m_new_m : 0.0; + } + + // Returns sample variance + double get_variance() const + { + return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); + } + + double get_std_dev() const + { + return sqrt(get_variance()); + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + private: + uint32_t m_n; + double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; + }; + + // Linear algebra + + template + class vec + { + protected: + T m_v[N]; + + public: + enum { num_elements = N }; + typedef T scalar_type; + + inline vec() { } + inline vec(eZero) { set_zero(); } + + explicit inline vec(T val) { set(val); } + inline vec(T v0, T v1) { set(v0, v1); } + inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } + inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } + inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } + template inline vec(const vec &other) { set(other); } + + inline const T& operator[](uint32_t i) const { assert(i < N); return m_v[i]; } + inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } + + inline T getX() const { return m_v[0]; } + inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; } + inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; } + inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; } + + inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } + inline bool operator!=(const vec& rhs) const { return !(*this == rhs); } + inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } + + inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + inline void clear() { set_zero(); } + + template + inline vec &set(const vec &other) + { + uint32_t i; + if ((const void *)(&other) == (const void *)(this)) + return *this; + const uint32_t m = minimum(OtherN, N); + for (i = 0; i < m; i++) + m_v[i] = static_cast(other[i]); + for (; i < N; i++) + m_v[i] = 0; + return *this; + } + + inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } + inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } + inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } + + inline vec &set(T v0, T v1) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + clear_elements(2, N); + } + return *this; + } + + inline vec &set(T v0, T v1, T v2) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + clear_elements(3, N); + } + } + return *this; + } + + inline vec &set(T v0, T v1, T v2, T v3) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + + if (N >= 4) + { + m_v[3] = v3; + clear_elements(5, N); + } + } + } + return *this; + } + + inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } + template inline vec &operator=(const vec &rhs) { set(rhs); return *this; } + + inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } + inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } + + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } + inline vec operator+ () const { return *this; } + inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } + inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } + inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } + inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } + inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } + inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } + + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } + friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } + friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } + friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } + friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } + friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } + + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + static inline T dot_product3(const vec& lhs, const vec& rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < minimum(3u, N); i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + + inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } + inline T dot3(const vec& rhs) const { return dot_product3(*this, rhs); } + + inline T norm() const { return dot_product(*this, *this); } + inline T length() const { return sqrt(norm()); } + + inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } + inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } + + inline T distance(const vec &other) const { return static_cast(sqrt(squared_distance(other))); } + inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } + + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + + inline vec get_normalized() const { vec res(*this); res.normalize_in_place(); return res; } + + inline vec &clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_v[i] = basisu::clamp(m_v[i], l, h); + return *this; + } + + static vec component_mul(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = a[i] * b[i]; + return res; + } + + static vec component_min(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = minimum(a[i], b[i]); + return res; + } + + static vec component_max(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = maximum(a[i], b[i]); + return res; + } + + static vec lerp(const vec& a, const vec& b, float s) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = basisu::lerp(a[i], b[i], s); + return res; + } + }; + + typedef vec<4, double> vec4D; + typedef vec<3, double> vec3D; + typedef vec<2, double> vec2D; + typedef vec<1, double> vec1D; + + typedef vec<6, float> vec6F; + typedef vec<5, float> vec5F; + typedef vec<4, float> vec4F; + typedef vec<3, float> vec3F; + typedef vec<2, float> vec2F; + typedef vec<1, float> vec1F; + + typedef vec<16, float> vec16F; + + template struct bitwise_copyable< vec > { enum { cFlag = true }; }; + template struct bitwise_movable< vec > { enum { cFlag = true }; }; + + template + class matrix + { + public: + typedef vec col_vec; + typedef vec row_vec; + + typedef T scalar_type; + + enum { rows = Rows, cols = Cols }; + + protected: + row_vec m_r[Rows]; + + public: + inline matrix() {} + inline matrix(eZero) { set_zero(); } + inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; } + inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } + + inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + + inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } + inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } + + inline matrix &set_zero() + { + for (uint32_t i = 0; i < Rows; i++) + m_r[i].set_zero(); + return *this; + } + + inline matrix &set_identity() + { + for (uint32_t i = 0; i < Rows; i++) + { + m_r[i].set_zero(); + if (i < Cols) + m_r[i][i] = 1.0f; + } + return *this; + } + }; + + template struct bitwise_copyable< matrix > { enum { cFlag = true }; }; + template struct bitwise_movable< matrix > { enum { cFlag = true }; }; + + template + inline VectorType compute_pca_from_covar(matrix &cmatrix) + { + VectorType axis; + if (N == 1) + axis.set(1.0f); + else + { + for (uint32_t i = 0; i < N; i++) + axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum(N - 1, 1))); + } + + VectorType prev_axis(axis); + + // Power iterations + for (uint32_t power_iter = 0; power_iter < 8; power_iter++) + { + VectorType trial_axis; + double max_sum = 0; + + for (uint32_t i = 0; i < N; i++) + { + double sum = 0; + for (uint32_t j = 0; j < N; j++) + sum += cmatrix[i][j] * axis[j]; + + trial_axis[i] = static_cast(sum); + + max_sum = maximum(fabs(sum), max_sum); + } + + if (max_sum != 0.0f) + trial_axis *= static_cast(1.0f / max_sum); + + VectorType delta_axis(prev_axis - trial_axis); + + prev_axis = axis; + axis = trial_axis; + + if (delta_axis.norm() < .0024f) + break; + } + + return axis.normalize_in_place(); + } + + template inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) + { + for (uint32_t i = 0; i < num_indices; i++) + pIndices[i] = i; + + std::sort( + pIndices, + pIndices + num_indices, + [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } + ); + } + + // 1-4 byte direct Radix sort. + template + T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size) + { + assert(key_ofs < sizeof(T)); + assert((key_size >= 1) && (key_size <= 4)); + + uint32_t hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs)) + + if (key_size == 4) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint32_t key0 = BASISU_GET_KEY(p); + const uint32_t key1 = BASISU_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_vals & 1) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + assert(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint32_t key0 = BASISU_GET_KEY(p); + const uint32_t key1 = BASISU_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_vals & 1) + { + const uint32_t key = BASISU_GET_KEY(p); + hist[key & 0xFF]++; + } + } + + T* pCur = pBuf0; + T* pNew = pBuf1; + + for (uint32_t pass = 0; pass < key_size; pass++) + { + const uint32_t* pHist = &hist[pass << 8]; + + uint32_t offsets[256]; + + uint32_t cur_ofs = 0; + for (uint32_t i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i + 1] = cur_ofs; + cur_ofs += pHist[i + 1]; + } + + const uint32_t pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; + uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint32_t dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset0 + 1] = p[1]; + } + else + { + uint32_t dst_offset0 = offsets[c0]++; + uint32_t dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset1] = p[1]; + } + } + + if (num_vals & 1) + { + uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; + + uint32_t dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = *p; + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef BASISU_GET_KEY + + // Very simple job pool with no dependencies. + class job_pool + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); + + public: + // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. + job_pool(uint32_t num_threads); + ~job_pool(); + + void add_job(const std::function& job); + void add_job(std::function&& job); + + void wait_for_all(); + + size_t get_total_threads() const { return 1 + m_threads.size(); } + + private: + std::vector m_threads; + std::vector > m_queue; + + std::mutex m_mutex; + std::condition_variable m_has_work; + std::condition_variable m_no_more_jobs; + + uint32_t m_num_active_jobs; + + std::atomic m_kill_flag; + + std::atomic m_num_active_workers; + + void job_thread(uint32_t index); + }; + + // Simple 64-bit color class + + class color_rgba_i16 + { + public: + union + { + int16_t m_comps[4]; + + struct + { + int16_t r; + int16_t g; + int16_t b; + int16_t a; + }; + }; + + inline color_rgba_i16() + { + static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4"); + } + + inline color_rgba_i16(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = (int16_t)clamp(sr, INT16_MIN, INT16_MAX); + m_comps[1] = (int16_t)clamp(sg, INT16_MIN, INT16_MAX); + m_comps[2] = (int16_t)clamp(sb, INT16_MIN, INT16_MAX); + m_comps[3] = (int16_t)clamp(sa, INT16_MIN, INT16_MAX); + return *this; + } + }; + + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + // Not too hot about this idea. + inline color_rgba(const basist::color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); } + inline uint32_t get_rgba_uint32() const { return r | (g << 8) | (b << 16) | (a << 24); } + + inline basist::color32 get_color32() const + { + return basist::color32(r, g, b, a); + } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } + }; + + typedef basisu::vector color_rgba_vec; + + const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_black_trans_color(0, 0, 0, 0); + const color_rgba g_white_color(255, 255, 255, 255); + + inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; + return dr * dr + dg * dg + db * db; + } + + inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; + return dr * dr + dg * dg + db * db + da * da; + } + + inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) + { + if (alpha) + return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); + else + return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); + } + + // Original library color_distance(), for testing + inline uint32_t color_distance_orig(bool perceptual, const color_rgba& e1, const color_rgba& e2, bool alpha) + { + if (perceptual) + { + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int64_t delta_l = dr * 27 + dg * 92 + db * 9; + int64_t delta_cr = dr * 128 - delta_l; + int64_t delta_cb = db * 128 - delta_l; + + uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) + + ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) + + ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U); + + if (alpha) + { + int da = (e1.a - e2.a) << 7; + // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. + id += ((uint32_t)(da * da) >> 7U); + } + + return id; + } + else + { + return color_distance(e1, e2, alpha); + } + } + + inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) + { + if (perceptual) + { + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + // This calc can't overflow or the SSE variants will overflow too. + int delta_l = dr * 14 + dg * 45 + db * 5; + int delta_cr = dr * 64 - delta_l; + int delta_cb = db * 64 - delta_l; + + // not >> 6, so the output is scaled by 7 bits, not 6 (to match the original function which scaled by 7, but had rare overflow issues) + uint32_t id = ((uint32_t)(delta_l * delta_l) >> 5U) + + ((((uint32_t)(delta_cr * delta_cr) >> 5U) * 26U) >> 7U) + + ((((uint32_t)(delta_cb * delta_cb) >> 5U) * 3U) >> 7U); + +#if defined(DEBUG) || defined(_DEBUG) + // Shouldn't need 64-bit now, but make sure + { + int64_t alt_delta_l = dr * 14 + dg * 45 + db * 5; + int64_t alt_delta_cr = dr * 64 - alt_delta_l; + int64_t alt_delta_cb = db * 64 - alt_delta_l; + + int64_t alt_id = ((alt_delta_l * alt_delta_l) >> 5) + + ((((alt_delta_cr * alt_delta_cr) >> 5) * 26) >> 7) + + ((((alt_delta_cb * alt_delta_cb) >> 5) * 3) >> 7); + + assert(alt_id == id); + } +#endif + + if (alpha) + { + int da = (e1.a - e2.a) << 7; + + // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. + uint32_t ea = ((uint32_t)(da * da) >> 7U); + id += ea; + +#if defined(DEBUG) || defined(_DEBUG) + // Make sure it can't overflow + assert((((int64_t)da * (int64_t)da) >> 7) == ea); +#endif + + } + + return id; + } + else + { + return color_distance(e1, e2, alpha); + } + } + + static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) + { + const int dl = a.r - b.r; + const int da = a.a - b.a; + return dl * dl + da * da; + } + + // String helpers + + inline int string_find_right(const std::string& filename, char c) + { + size_t result = filename.find_last_of(c); + return (result == std::string::npos) ? -1 : (int)result; + } + + inline std::string string_get_extension(const std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if (dot <= sep) + return ""; + + std::string result(filename); + result.erase(0, dot + 1); + + return result; + } + + inline bool string_remove_extension(std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if ((dot < sep) || (dot < 0)) + return false; + + filename.resize(dot); + + return true; + } + + inline std::string string_tolower(const std::string& s) + { + std::string result(s); + for (size_t i = 0; i < result.size(); i++) + { + result[i] = (char)tolower((uint8_t)(result[i])); + } + return result; + } + + inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) + { + assert(pDst && pSrc && dst_len); + if (!dst_len) + return pDst; + + const size_t src_len = strlen(pSrc); + const size_t src_len_plus_terminator = src_len + 1; + + if (src_len_plus_terminator <= dst_len) + memcpy(pDst, pSrc, src_len_plus_terminator); + else + { + if (dst_len > 1) + memcpy(pDst, pSrc, dst_len - 1); + pDst[dst_len - 1] = '\0'; + } + + return pDst; + } + + inline bool string_ends_with(const std::string& s, char c) + { + return (s.size() != 0) && (s.back() == c); + } + + inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) + { +#ifdef _MSC_VER + char drive_buf[_MAX_DRIVE] = { 0 }; + char dir_buf[_MAX_DIR] = { 0 }; + char fname_buf[_MAX_FNAME] = { 0 }; + char ext_buf[_MAX_EXT] = { 0 }; + + errno_t error = _splitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + return false; + + if (pDrive) *pDrive = drive_buf; + if (pDir) *pDir = dir_buf; + if (pFilename) *pFilename = fname_buf; + if (pExt) *pExt = ext_buf; + return true; +#else + char dirtmp[1024], nametmp[1024]; + strcpy_safe(dirtmp, sizeof(dirtmp), p); + strcpy_safe(nametmp, sizeof(nametmp), p); + + if (pDrive) + pDrive->resize(0); + + const char *pDirName = dirname(dirtmp); + const char* pBaseName = basename(nametmp); + if ((!pDirName) || (!pBaseName)) + return false; + + if (pDir) + { + *pDir = pDirName; + if ((pDir->size()) && (pDir->back() != '/')) + *pDir += "/"; + } + + if (pFilename) + { + *pFilename = pBaseName; + string_remove_extension(*pFilename); + } + + if (pExt) + { + *pExt = pBaseName; + *pExt = string_get_extension(*pExt); + if (pExt->size()) + *pExt = "." + *pExt; + } + + return true; +#endif + } + + inline bool is_path_separator(char c) + { +#ifdef _WIN32 + return (c == '/') || (c == '\\'); +#else + return (c == '/'); +#endif + } + + inline bool is_drive_separator(char c) + { +#ifdef _WIN32 + return (c == ':'); +#else + (void)c; + return false; +#endif + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q) + { + std::string temp(p); + if (temp.size() && !is_path_separator(q[0])) + { + if (!is_path_separator(temp.back())) + temp.append(1, BASISU_PATH_SEPERATOR_CHAR); + } + temp += q; + dst.swap(temp); + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) + { + string_combine_path(dst, p, q); + string_combine_path(dst, dst.c_str(), r); + } + + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) + { + string_combine_path(dst, p, q, r); + if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) + dst.append(1, '.'); + dst.append(pExt); + } + + inline bool string_get_pathname(const char *p, std::string &path) + { + std::string temp_drive, temp_path; + if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) + return false; + string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); + return true; + } + + inline bool string_get_filename(const char *p, std::string &filename) + { + std::string temp_ext; + if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) + return false; + filename += temp_ext; + return true; + } + + class rand + { + std::mt19937 m_mt; + + public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + uint8_t byte() { return static_cast(urand32()); } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } + }; + + class priority_queue + { + public: + priority_queue() : + m_size(0) + { + } + + void clear() + { + m_heap.clear(); + m_size = 0; + } + + void init(uint32_t max_entries, uint32_t first_index, float first_priority) + { + m_heap.resize(max_entries + 1); + m_heap[1].m_index = first_index; + m_heap[1].m_priority = first_priority; + m_size = 1; + } + + inline uint32_t size() const { return m_size; } + + inline uint32_t get_top_index() const { return m_heap[1].m_index; } + inline float get_top_priority() const { return m_heap[1].m_priority; } + + inline void delete_top() + { + assert(m_size > 0); + m_heap[1] = m_heap[m_size]; + m_size--; + if (m_size) + down_heap(1); + } + + inline void add_heap(uint32_t index, float priority) + { + m_size++; + + uint32_t k = m_size; + + if (m_size >= m_heap.size()) + m_heap.resize(m_size + 1); + + for (;;) + { + uint32_t parent_index = k >> 1; + if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) + break; + m_heap[k] = m_heap[parent_index]; + k = parent_index; + } + + m_heap[k].m_index = index; + m_heap[k].m_priority = priority; + } + + private: + struct entry + { + uint32_t m_index; + float m_priority; + }; + + basisu::vector m_heap; + uint32_t m_size; + + // Push down entry at index + inline void down_heap(uint32_t heap_index) + { + uint32_t orig_index = m_heap[heap_index].m_index; + const float orig_priority = m_heap[heap_index].m_priority; + + uint32_t child_index; + while ((child_index = (heap_index << 1)) <= m_size) + { + if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; + if (orig_priority > m_heap[child_index].m_priority) + break; + m_heap[heap_index] = m_heap[child_index]; + heap_index = child_index; + } + + m_heap[heap_index].m_index = orig_index; + m_heap[heap_index].m_priority = orig_priority; + } + }; + + // Tree structured vector quantization (TSVQ) + + template + class tree_vector_quant + { + public: + typedef TrainingVectorType training_vec_type; + typedef std::pair training_vec_with_weight; + typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; + + tree_vector_quant() : + m_next_codebook_index(0) + { + } + + void clear() + { + clear_vector(m_training_vecs); + clear_vector(m_nodes); + m_next_codebook_index = 0; + } + + void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } + + size_t get_total_training_vecs() const { return m_training_vecs.size(); } + const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } + array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } + + void retrieve(basisu::vector< basisu::vector > &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_training_vecs; + } + } + + void retrieve(basisu::vector &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_origin; + } + } + + void retrieve(uint32_t max_clusters, basisu::vector &codebook) const + { + uint_vec node_stack; + node_stack.reserve(512); + + codebook.resize(0); + codebook.reserve(max_clusters); + + uint32_t node_index = 0; + + while (true) + { + const tsvq_node& cur = m_nodes[node_index]; + + if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) + { + codebook.resize(codebook.size() + 1); + codebook.back() = cur.m_training_vecs; + + if (node_stack.empty()) + break; + + node_index = node_stack.back(); + node_stack.pop_back(); + continue; + } + + node_stack.push_back(cur.m_right_index); + node_index = cur.m_left_index; + } + } + + bool generate(uint32_t max_size) + { + if (!m_training_vecs.size()) + return false; + + m_next_codebook_index = 0; + + clear_vector(m_nodes); + m_nodes.reserve(max_size * 2 + 1); + + m_nodes.push_back(prepare_root()); + + priority_queue var_heap; + var_heap.init(max_size, 0, m_nodes[0].m_var); + + basisu::vector l_children, r_children; + + // Now split the worst nodes + l_children.reserve(m_training_vecs.size() + 1); + r_children.reserve(m_training_vecs.size() + 1); + + uint32_t total_leaf_nodes = 1; + + //interval_timer tm; + //tm.start(); + + while ((var_heap.size()) && (total_leaf_nodes < max_size)) + { + const uint32_t node_index = var_heap.get_top_index(); + const tsvq_node &node = m_nodes[node_index]; + + assert(node.m_var == var_heap.get_top_priority()); + assert(node.is_leaf()); + + var_heap.delete_top(); + + if (node.m_training_vecs.size() > 1) + { + if (split_node(node_index, var_heap, l_children, r_children)) + { + // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. + total_leaf_nodes += 1; + } + } + } + + //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs()); + + return true; + } + + private: + class tsvq_node + { + public: + inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } + + // vecs is erased + inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } + + inline bool is_leaf() const { return m_left_index < 0; } + + float m_var; + uint64_t m_weight; + TrainingVectorType m_origin; + int32_t m_left_index, m_right_index; + basisu::vector m_training_vecs; + int m_codebook_index; + }; + + typedef basisu::vector tsvq_node_vec; + tsvq_node_vec m_nodes; + + array_of_weighted_training_vecs m_training_vecs; + + uint32_t m_next_codebook_index; + + tsvq_node prepare_root() const + { + double ttsum = 0.0f; + + // Prepare root node containing all training vectors + tsvq_node root; + root.m_training_vecs.reserve(m_training_vecs.size()); + + for (uint32_t i = 0; i < m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[i].first; + const uint64_t weight = m_training_vecs[i].second; + + root.m_training_vecs.push_back(i); + + root.m_origin += (v * static_cast(weight)); + root.m_weight += weight; + + ttsum += v.dot(v) * weight; + } + + root.m_var = static_cast(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); + + root.m_origin *= (1.0f / root.m_weight); + + return root; + } + + bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector &l_children, basisu::vector &r_children) + { + TrainingVectorType l_child_org, r_child_org; + uint64_t l_weight = 0, r_weight = 0; + float l_var = 0.0f, r_var = 0.0f; + + // Compute initial left/right child origins + if (!prep_split(m_nodes[node_index], l_child_org, r_child_org)) + return false; + + // Use k-means iterations to refine these children vectors + if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) + return false; + + // Create children + const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; + + m_nodes[node_index].m_left_index = l_child_index; + m_nodes[node_index].m_right_index = r_child_index; + + m_nodes[node_index].m_codebook_index = m_next_codebook_index; + m_next_codebook_index++; + + m_nodes.resize(m_nodes.size() + 2); + + tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; + + l_child.set(l_child_org, l_weight, l_var, l_children); + r_child.set(r_child_org, r_weight, r_var, r_children); + + if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) + { + TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); + + for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) + { + if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) + { + l_child.m_var = 1e-4f; + break; + } + } + } + + if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1)) + { + TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first); + + for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++) + { + if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first)) + { + r_child.m_var = 1e-4f; + break; + } + } + } + + if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) + var_heap.add_heap(l_child_index, l_child.m_var); + + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) + var_heap.add_heap(r_child_index, r_child.m_var); + + return true; + } + + TrainingVectorType compute_split_axis(const tsvq_node &node) const + { + const uint32_t N = TrainingVectorType::num_elements; + + matrix cmatrix; + + if ((N != 16) || (!g_cpu_supports_sse41)) + { + cmatrix.set_zero(); + + // Compute covariance matrix from weighted input vectors + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); + const TrainingVectorType w(static_cast(m_training_vecs[node.m_training_vecs[i]].second) * v); + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; + } + } + else + { +#if BASISU_SUPPORT_SSE + // Specialize the case with 16x16 matrices, which are quite expensive without SIMD. + // This SSE function takes pointers to void types, so do some sanity checks. + assert(sizeof(TrainingVectorType) == sizeof(float) * 16); + assert(sizeof(training_vec_with_weight) == sizeof(std::pair)); + update_covar_matrix_16x16_sse41(node.m_training_vecs.size_u32(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix); +#endif + } + + const float renorm_scale = 1.0f / node.m_weight; + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] *= renorm_scale; + + // Diagonal flip + for (uint32_t x = 0; x < (N - 1); x++) + for (uint32_t y = x + 1; y < N; y++) + cmatrix[y][x] = cmatrix[x][y]; + + return compute_pca_from_covar(cmatrix); + } + + bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const + { + //const uint32_t N = TrainingVectorType::num_elements; + + if (2 == node.m_training_vecs.size()) + { + l_child_result = m_training_vecs[node.m_training_vecs[0]].first; + r_child_result = m_training_vecs[node.m_training_vecs[1]].first; + return true; + } + + TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); + double l_weight = 0.0f, r_weight = 0.0f; + + // Compute initial left/right children + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; + + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + + double t = (v - node.m_origin).dot(axis); + if (t >= 0.0f) + { + r_child += v * weight; + r_weight += weight; + } + else + { + l_child += v * weight; + l_weight += weight; + } + } + + if ((l_weight > 0.0f) && (r_weight > 0.0f)) + { + l_child_result = l_child * static_cast(1.0f / l_weight); + r_child_result = r_child * static_cast(1.0f / r_weight); + } + else + { + TrainingVectorType l(1e+20f); + TrainingVectorType h(-1e+20f); + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + + l = TrainingVectorType::component_min(l, v); + h = TrainingVectorType::component_max(h, v); + } + + TrainingVectorType r(h - l); + + float largest_axis_v = 0.0f; + int largest_axis_index = -1; + for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++) + { + if (r[i] > largest_axis_v) + { + largest_axis_v = r[i]; + largest_axis_index = i; + } + } + + if (largest_axis_index < 0) + return false; + + basisu::vector keys(node.m_training_vecs.size()); + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; + + uint_vec indices(node.m_training_vecs.size()); + indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]); + + l_child.set_zero(); + l_weight = 0; + + r_child.set_zero(); + r_weight = 0; + + const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2; + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; + + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + + if (i < half_index) + { + l_child += v * weight; + l_weight += weight; + } + else + { + r_child += v * weight; + r_weight += weight; + } + } + + if ((l_weight > 0.0f) && (r_weight > 0.0f)) + { + l_child_result = l_child * static_cast(1.0f / l_weight); + r_child_result = r_child * static_cast(1.0f / r_weight); + } + else + { + l_child_result = l; + r_child_result = h; + } + } + + return true; + } + + bool refine_split(const tsvq_node &node, + TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector &l_children, + TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector &r_children) const + { + l_children.reserve(node.m_training_vecs.size()); + r_children.reserve(node.m_training_vecs.size()); + + float prev_total_variance = 1e+10f; + + // Refine left/right children locations using k-means iterations + const uint32_t cMaxIters = 6; + for (uint32_t iter = 0; iter < cMaxIters; iter++) + { + l_children.resize(0); + r_children.resize(0); + + TrainingVectorType new_l_child(cZero), new_r_child(cZero); + + double l_ttsum = 0.0f, r_ttsum = 0.0f; + + l_weight = 0; + r_weight = 0; + + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; + + double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v); + + if (left_dist2 >= right_dist2) + { + new_r_child += (v * static_cast(weight)); + r_weight += weight; + + r_ttsum += weight * v.dot(v); + r_children.push_back(node.m_training_vecs[i]); + } + else + { + new_l_child += (v * static_cast(weight)); + l_weight += weight; + + l_ttsum += weight * v.dot(v); + l_children.push_back(node.m_training_vecs[i]); + } + } + + // Node is unsplittable using the above algorithm - try something else to split it up. + if ((!l_weight) || (!r_weight)) + { + l_children.resize(0); + new_l_child.set(0.0f); + l_ttsum = 0.0f; + l_weight = 0; + + r_children.resize(0); + new_r_child.set(0.0f); + r_ttsum = 0.0f; + r_weight = 0; + + TrainingVectorType firstVec; + firstVec.clear(); + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; + + if ((!i) || (v == firstVec)) + { + firstVec = v; + + new_r_child += (v * static_cast(weight)); + r_weight += weight; + + r_ttsum += weight * v.dot(v); + r_children.push_back(node.m_training_vecs[i]); + } + else + { + new_l_child += (v * static_cast(weight)); + l_weight += weight; + + l_ttsum += weight * v.dot(v); + l_children.push_back(node.m_training_vecs[i]); + } + } + + if ((!l_weight) || (!r_weight)) + return false; + } + + l_var = static_cast(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); + r_var = static_cast(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); + + new_l_child *= (1.0f / l_weight); + new_r_child *= (1.0f / r_weight); + + l_child = new_l_child; + r_child = new_r_child; + + float total_var = l_var + r_var; + const float cGiveupVariance = .00001f; + if (total_var < cGiveupVariance) + break; + + // Check to see if the variance has settled + const float cVarianceDeltaThresh = .00125f; + if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) + break; + + prev_total_variance = total_var; + } + + return true; + } + }; + + struct weighted_block_group + { + uint64_t m_total_weight; + uint_vec m_indices; + }; + + template + bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, + uint32_t max_codebook_size, uint32_t max_parent_codebook_size, + basisu::vector& codebook, + basisu::vector& parent_codebook, + uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) + { + codebook.resize(0); + parent_codebook.resize(0); + + if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16)) + { + if (!q.generate(max_codebook_size)) + return false; + + q.retrieve(codebook); + + if (max_parent_codebook_size) + q.retrieve(max_parent_codebook_size, parent_codebook); + + return true; + } + + const uint32_t cMaxThreads = 16; + if (max_threads > cMaxThreads) + max_threads = cMaxThreads; + + if (!q.generate(max_threads)) + return false; + + basisu::vector initial_codebook; + + q.retrieve(initial_codebook); + + if (initial_codebook.size() < max_threads) + { + codebook = initial_codebook; + + if (max_parent_codebook_size) + q.retrieve(max_parent_codebook_size, parent_codebook); + + return true; + } + + Quantizer quantizers[cMaxThreads]; + + bool success_flags[cMaxThreads]; + clear_obj(success_flags); + + basisu::vector local_clusters[cMaxThreads]; + basisu::vector local_parent_clusters[cMaxThreads]; + + for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) + { + pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { + + Quantizer& lq = quantizers[thread_iter]; + uint_vec& cluster_indices = initial_codebook[thread_iter]; + + uint_vec local_to_global(cluster_indices.size()); + + for (uint32_t i = 0; i < cluster_indices.size(); i++) + { + const uint32_t global_training_vec_index = cluster_indices[i]; + local_to_global[i] = global_training_vec_index; + + lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second); + } + + const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs(); + + success_flags[thread_iter] = lq.generate(max_clusters); + + if (success_flags[thread_iter]) + { + lq.retrieve(local_clusters[thread_iter]); + + for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++) + { + for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++) + local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]]; + } + + if (max_parent_codebook_size) + { + lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]); + + for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++) + { + for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++) + local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]]; + } + } + } + + } ); + + } // thread_iter + + pJob_pool->wait_for_all(); + + uint32_t total_clusters = 0, total_parent_clusters = 0; + + for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++) + { + if (!success_flags[thread_iter]) + return false; + total_clusters += (uint32_t)local_clusters[thread_iter].size(); + total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size(); + } + + codebook.reserve(total_clusters); + parent_codebook.reserve(total_parent_clusters); + + for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) + { + for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++) + { + codebook.resize(codebook.size() + 1); + codebook.back().swap(local_clusters[thread_iter][j]); + } + + for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++) + { + parent_codebook.resize(parent_codebook.size() + 1); + parent_codebook.back().swap(local_parent_clusters[thread_iter][j]); + } + } + + return true; + } + + template + bool generate_hierarchical_codebook_threaded(Quantizer& q, + uint32_t max_codebook_size, uint32_t max_parent_codebook_size, + basisu::vector& codebook, + basisu::vector& parent_codebook, + uint32_t max_threads, job_pool *pJob_pool, + bool even_odd_input_pairs_equal) + { + //typedef bit_hasher training_vec_bit_hasher; + + // rg 6/24/2025 - Cross platform determinism +#if 0 + typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, + training_vec_bit_hasher> group_hash; +#else + typedef std::map< typename Quantizer::training_vec_type, weighted_block_group > group_hash; +#endif + + //interval_timer tm; + //tm.start(); + + group_hash unique_vecs; + + // rg 6/24/2025 - Cross platform determinism +#if 0 + unique_vecs.reserve(20000); +#endif + + weighted_block_group g; + + if (even_odd_input_pairs_equal) + { + g.m_indices.resize(2); + + assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0); + + for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2) + { + assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first); + + g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second; + g.m_indices[0] = i; + g.m_indices[1] = i + 1; + + auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); + + if (!ins_res.second) + { + (ins_res.first)->second.m_total_weight += g.m_total_weight; + (ins_res.first)->second.m_indices.push_back(i); + (ins_res.first)->second.m_indices.push_back(i + 1); + } + } + } + else + { + g.m_indices.resize(1); + + for (uint32_t i = 0; i < q.get_training_vecs().size(); i++) + { + g.m_total_weight = q.get_training_vecs()[i].second; + g.m_indices[0] = i; + + auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); + + if (!ins_res.second) + { + (ins_res.first)->second.m_total_weight += g.m_total_weight; + (ins_res.first)->second.m_indices.push_back(i); + } + } + } + + //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs()); + debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size()); + + Quantizer group_quant; + typedef typename group_hash::const_iterator group_hash_const_iter; + basisu::vector unique_vec_iters; + unique_vec_iters.reserve(unique_vecs.size()); + + for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) + { + group_quant.add_training_vec(iter->first, iter->second.m_total_weight); + unique_vec_iters.push_back(iter); + } + + bool limit_clusterizers = true; + if (unique_vecs.size() <= max_codebook_size) + limit_clusterizers = false; + + debug_printf("Limit clusterizers: %u\n", limit_clusterizers); + + basisu::vector group_codebook, group_parent_codebook; + bool status = generate_hierarchical_codebook_threaded_internal(group_quant, + max_codebook_size, max_parent_codebook_size, + group_codebook, + group_parent_codebook, + (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool); + + if (!status) + return false; + + codebook.resize(0); + for (uint32_t i = 0; i < group_codebook.size(); i++) + { + codebook.resize(codebook.size() + 1); + + for (uint32_t j = 0; j < group_codebook[i].size(); j++) + { + const uint32_t group_index = group_codebook[i][j]; + + typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; + const uint_vec& training_vec_indices = group_iter->second.m_indices; + + append_vector(codebook.back(), training_vec_indices); + } + } + + parent_codebook.resize(0); + for (uint32_t i = 0; i < group_parent_codebook.size(); i++) + { + parent_codebook.resize(parent_codebook.size() + 1); + + for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++) + { + const uint32_t group_index = group_parent_codebook[i][j]; + + typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; + const uint_vec& training_vec_indices = group_iter->second.m_indices; + + append_vector(parent_codebook.back(), training_vec_indices); + } + } + + return true; + } + + // Canonical Huffman coding + + class histogram + { + basisu::vector m_hist; + + public: + histogram(uint32_t size = 0) { init(size); } + + void clear() + { + clear_vector(m_hist); + } + + void init(uint32_t size) + { + m_hist.resize(0); + m_hist.resize(size); + } + + inline uint32_t size() const { return static_cast(m_hist.size()); } + + inline const uint32_t &operator[] (uint32_t index) const + { + return m_hist[index]; + } + + inline uint32_t &operator[] (uint32_t index) + { + return m_hist[index]; + } + + inline void inc(uint32_t index) + { + m_hist[index]++; + } + + uint64_t get_total() const + { + uint64_t total = 0; + for (uint32_t i = 0; i < m_hist.size(); ++i) + total += m_hist[i]; + return total; + } + + double get_entropy() const + { + double total = static_cast(get_total()); + if (total == 0.0f) + return 0.0f; + + const double inv_total = 1.0f / total; + const double neg_inv_log2 = -1.0f / log(2.0f); + + double e = 0.0f; + for (uint32_t i = 0; i < m_hist.size(); i++) + if (m_hist[i]) + e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast(m_hist[i]); + + return e; + } + }; + + struct sym_freq + { + uint32_t m_key; + uint16_t m_sym_index; + }; + + sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); + void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); + + class huffman_encoding_table + { + public: + huffman_encoding_table() + { + } + + void clear() + { + clear_vector(m_codes); + clear_vector(m_code_sizes); + } + + bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) + { + return init(h.size(), &h[0], max_code_size); + } + + bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); + bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); + + inline const uint16_vec &get_codes() const { return m_codes; } + inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } + + uint32_t get_total_used_codes() const + { + for (int i = static_cast(m_code_sizes.size()) - 1; i >= 0; i--) + if (m_code_sizes[i]) + return i + 1; + return 0; + } + + private: + uint16_vec m_codes; + uint8_vec m_code_sizes; + }; + + class bitwise_coder + { + public: + bitwise_coder() : + m_bit_buffer(0), + m_bit_buffer_size(0), + m_total_bits(0) + { + } + + bitwise_coder(const bitwise_coder& other) : + m_bytes(other.m_bytes), + m_bit_buffer(other.m_bit_buffer), + m_bit_buffer_size(other.m_bit_buffer_size), + m_total_bits(other.m_total_bits) + { + } + + bitwise_coder(bitwise_coder&& other) : + m_bytes(std::move(other.m_bytes)), + m_bit_buffer(other.m_bit_buffer), + m_bit_buffer_size(other.m_bit_buffer_size), + m_total_bits(other.m_total_bits) + { + } + + bitwise_coder& operator= (const bitwise_coder& rhs) + { + if (this == &rhs) + return *this; + + m_bytes = rhs.m_bytes; + m_bit_buffer = rhs.m_bit_buffer; + m_bit_buffer_size = rhs.m_bit_buffer_size; + m_total_bits = rhs.m_total_bits; + + return *this; + } + + bitwise_coder& operator= (bitwise_coder&& rhs) + { + if (this == &rhs) + return *this; + + m_bytes = std::move(rhs.m_bytes); + m_bit_buffer = rhs.m_bit_buffer; + m_bit_buffer_size = rhs.m_bit_buffer_size; + m_total_bits = rhs.m_total_bits; + + return *this; + } + + inline void clear() + { + clear_vector(m_bytes); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline void restart() + { + m_bytes.resize(0); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline const uint8_vec &get_bytes() const { return m_bytes; } + inline uint8_vec& get_bytes() { return m_bytes; } + + inline void reserve(uint32_t size) { m_bytes.reserve(size); } + + inline uint64_t get_total_bits() const { return m_total_bits; } + inline uint32_t get_total_bits_u32() const { assert(m_total_bits <= UINT32_MAX); return static_cast(m_total_bits); } + inline void clear_total_bits() { m_total_bits = 0; } + + inline void init(uint32_t reserve_size = 1024) + { + m_bytes.reserve(reserve_size); + m_bytes.resize(0); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline uint32_t flush() + { + if (m_bit_buffer_size) + { + m_total_bits += 8 - (m_bit_buffer_size & 7); + append_byte(static_cast(m_bit_buffer)); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + + return 8; + } + + return 0; + } + + inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) + { + assert(num_bits <= 32); + assert(bits < (1ULL << num_bits)); + + if (!num_bits) + return 0; + + m_total_bits += num_bits; + + uint64_t v = (static_cast(bits) << m_bit_buffer_size) | m_bit_buffer; + m_bit_buffer_size += num_bits; + + while (m_bit_buffer_size >= 8) + { + append_byte(static_cast(v)); + v >>= 8; + m_bit_buffer_size -= 8; + } + + m_bit_buffer = static_cast(v); + return num_bits; + } + + inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) + { + uint32_t code = tab.get_codes()[sym]; + uint32_t code_size = tab.get_code_sizes()[sym]; + assert(code_size >= 1); + put_bits(code, code_size); + return code_size; + } + + inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + return put_bits(v, k); + + uint32_t x = v + u; + assert((x >> 1) >= u); + + put_bits(x >> 1, k); + put_bits(x & 1, 1); + return k + 1; + } + + inline uint32_t put_rice(uint32_t v, uint32_t m) + { + assert(m); + + const uint64_t start_bits = m_total_bits; + + uint32_t q = v >> m, r = v & ((1 << m) - 1); + + // rice coding sanity check + assert(q <= 64); + + for (; q > 16; q -= 16) + put_bits(0xFFFF, 16); + + put_bits((1 << q) - 1, q); + put_bits(r << 1, m + 1); + + return (uint32_t)(m_total_bits - start_bits); + } + + inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits) + { + assert(chunk_bits); + + const uint32_t chunk_size = 1 << chunk_bits; + const uint32_t chunk_mask = chunk_size - 1; + + uint32_t total_bits = 0; + + for ( ; ; ) + { + uint32_t next_v = v >> chunk_bits; + + total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); + if (!next_v) + break; + + v = next_v; + } + + return total_bits; + } + + uint32_t emit_huffman_table(const huffman_encoding_table &tab); + + void append(const bitwise_coder& other) + { + for (uint32_t i = 0; i < other.m_bytes.size(); i++) + put_bits(other.m_bytes[i], 8); + + if (other.m_bit_buffer_size) + put_bits(other.m_bit_buffer, other.m_bit_buffer_size); + } + + private: + uint8_vec m_bytes; + uint32_t m_bit_buffer, m_bit_buffer_size; + uint64_t m_total_bits; + + inline void append_byte(uint8_t c) + { + //m_bytes.resize(m_bytes.size() + 1); + //m_bytes.back() = c; + + m_bytes.push_back(c); + } + + static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); + static void end_zero_run(uint16_vec &syms, uint32_t &run_size); + }; + + class huff2D + { + public: + huff2D() { } + huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } + + inline const histogram &get_histogram() const { return m_histogram; } + inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } + + inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) + { + assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); + + m_bits_per_sym = bits_per_sym; + m_total_syms_per_group = total_syms_per_group; + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); + } + + inline void clear() + { + m_group_bits.clear(); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + } + + inline void emit(uint32_t sym) + { + m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); + m_cur_num_syms++; + + if (m_cur_num_syms == m_total_syms_per_group) + flush(); + } + + inline void flush() + { + if (m_cur_num_syms) + { + m_group_bits.push_back(m_cur_sym_bits); + m_histogram.inc(m_cur_sym_bits); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + } + } + + inline bool start_encoding(uint32_t code_size_limit = 16) + { + flush(); + + if (!m_encoding_table.init(m_histogram, code_size_limit)) + return false; + + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + return true; + } + + inline uint32_t emit_next_sym(bitwise_coder &c) + { + uint32_t bits = 0; + + if (!m_decode_syms_remaining) + { + bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); + m_decode_syms_remaining = m_total_syms_per_group; + } + + m_decode_syms_remaining--; + return bits; + } + + inline void emit_flush() + { + m_decode_syms_remaining = 0; + } + + private: + uint_vec m_group_bits; + huffman_encoding_table m_encoding_table; + histogram m_histogram; + uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; + }; + + bool huffman_test(int rand_seed); + + // VQ index reordering + + class palette_index_reorderer + { + public: + palette_index_reorderer() + { + } + + void clear() + { + clear_vector(m_hist); + clear_vector(m_total_count_to_picked); + clear_vector(m_entries_picked); + clear_vector(m_entries_to_do); + clear_vector(m_remap_table); + } + + // returns [0,1] distance of entry i to entry j + typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); + + void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + + // Table remaps old to new symbol indices + inline const uint_vec &get_remap_table() const { return m_remap_table; } + + private: + uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; + + inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } + inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } + + void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); + void find_initial(uint32_t num_syms); + void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + }; + + // Simple 32-bit 2D image class + + class image + { + public: + image() : + m_width(0), m_height(0), m_pitch(0) + { + } + + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : + m_width(0), m_height(0), m_pitch(0) + { + init(pImage, width, height, comps); + } + + image(const image &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + image(image&& other) : + m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch), + m_pixels(std::move(other.m_pixels)) + { + other.m_width = 0; + other.m_height = 0; + other.m_pitch = 0; + } + + image& operator= (image&& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = std::move(rhs.m_pixels); + + rhs.m_width = 0; + rhs.m_height = 0; + rhs.m_pitch = 0; + } + return *this; + } + + image &swap(image &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + image &operator= (const image &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + image &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + image& match_dimensions(const image& other) + { + resize(other.get_width(), other.get_height()); + return *this; + } + + image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) + { + return crop(w, h, p, background); + } + + image &set_all(const color_rgba &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) + { + assert(comps >= 1 && comps <= 4); + + resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const uint8_t *pSrc = &pImage[(x + y * width) * comps]; + color_rgba &dst = (*this)(x, y); + + if (comps == 1) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = 255; + } + else if (comps == 2) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = pSrc[1]; + } + else + { + dst.r = pSrc[0]; + dst.g = pSrc[1]; + dst.b = pSrc[2]; + if (comps == 4) + dst.a = pSrc[3]; + else + dst.a = 255; + } + } + } + } + + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) + { + assert((int)w >= 0); + assert((int)h >= 0); + + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) + { + assert((int)w >= 0); + assert((int)h >= 0); + + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped_alpha(x + ix, y + iy, c); + return *this; + } + + image &crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + + // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). + image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) + { + if (p == UINT32_MAX) + p = w; + + clear(); + + if ((!p) || (!w) || (!h)) + return *this; + + m_pixels.grant_ownership(pPixels, p * h, p * h); + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + color_rgba_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + if (init_image) + { + if (m_width || m_height) + { + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + } + else + { + m_pixels.set_all(background); + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline image &set_clipped(int x, int y, const color_rgba &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + inline image& set_clipped_alpha(int x, int y, const color_rgba& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y).m_comps[3] = c.m_comps[3]; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_width()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + if (((src_x + w) > m_width) || ((src_y + h) > m_height)) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + } + else + { + const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(color_rgba)); + pSrc += m_pitch; + pDst += w; + } + } + + return *this; + } + + image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline bool is_valid() const { return m_width > 0; } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const color_rgba_vec &get_pixels() const { return m_pixels; } + inline color_rgba_vec &get_pixels() { return m_pixels; } + + inline const color_rgba *get_ptr() const { return &m_pixels[0]; } + inline color_rgba *get_ptr() { return &m_pixels[0]; } + + bool has_alpha(uint32_t channel = 3) const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y)[channel] < 255) + return true; + + return false; + } + + image &set_alpha(uint8_t a) + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + (*this)(x, y).a = a; + return *this; + } + + image &flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + return *this; + } + + // TODO: There are many ways to do this, not sure this is the best way. + image &renormalize_normal_map() + { + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_rgba &c = (*this)(x, y); + if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + continue; + + vec3F v(c.r, c.g, c.b); + v = (v * (2.0f / 255.0f)) - vec3F(1.0f); + v.clamp(-1.0f, 1.0f); + + float length = v.length(); + const float cValidThresh = .077f; + if (length < cValidThresh) + { + c.set(128, 128, 128, c.a); + } + else if (fabs(length - 1.0f) > cValidThresh) + { + if (length) + v /= length; + + for (uint32_t i = 0; i < 3; i++) + c[i] = static_cast(clamp(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); + + if ((c.g == 128) && (c.r == 128)) + { + if (c.b < 128) + c.b = 0; + else + c.b = 255; + } + } + } + } + return *this; + } + + void swap_rb() + { + for (auto& v : m_pixels) + std::swap(v.r, v.b); + } + + void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); + + // bilinear filtering + vec4F get_filtered_vec4F(float x, float y) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floorf(x); + int iy = (int)floorf(y); + float wx = x - ix; + float wy = y - iy; + + color_rgba a(get_clamped(ix, iy)); + color_rgba b(get_clamped(ix + 1, iy)); + color_rgba c(get_clamped(ix, iy + 1)); + color_rgba d(get_clamped(ix + 1, iy + 1)); + + vec4F result; + + for (uint32_t i = 0; i < 4; i++) + { + const float top = lerp((float)a[i], (float)b[i], wx); + const float bot = lerp((float)c[i], (float)d[i], wx); + const float m = lerp((float)top, (float)bot, wy); + + result[i] = m; + } + + return result; + } + + // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. Clamp addressing. + color_rgba get_filtered(float x, float y) const + { + const vec4F fresult(get_filtered_vec4F(x, y)); + + color_rgba result; + + for (uint32_t i = 0; i < 4; i++) + result[i] = (uint8_t)clamp((int)(fresult[i] + .5f), 0, 255); + + return result; + } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + color_rgba_vec m_pixels; + }; + + void draw_line(image& dst, int xs, int ys, int xe, int ye, const color_rgba& color); + void draw_circle(image& dst, int cx, int cy, int r, const color_rgba& color); + + inline bool is_solid_block(uint32_t n, const color_rgba* pPixels) + { + assert(n); + + if (n <= 1) + return true; + + const color_rgba c(pPixels[0]); + + for (uint32_t i = 1; i < n; i++) + if (c != pPixels[i]) + return false; + + return true; + } + + inline bool is_alpha_block(uint32_t n, const color_rgba* pPixels) + { + assert(n); + + for (uint32_t i = 0; i < n; i++) + if (pPixels[i][3] != 255) + return true; + + return false; + } + + // Float images + + typedef basisu::vector vec4F_vec; + + class imagef + { + public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef(imagef&& other) : + m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch), + m_pixels(std::move(other.m_pixels)) + { + other.m_width = 0; + other.m_height = 0; + other.m_pitch = 0; + } + + imagef& operator= (imagef&& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = std::move(rhs.m_pixels); + + rhs.m_width = 0; + rhs.m_height = 0; + rhs.m_pitch = 0; + } + return *this; + } + + imagef &swap(imagef &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef &operator= (const imagef &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) + { + const uint32_t width = src.get_width(); + const uint32_t height = src.get_height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba &src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef& match_dimensions(const imagef& other) + { + resize(other.get_width(), other.get_height()); + return *this; + } + + imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return crop(w, h, p, background); + } + + imagef &set_all(const vec4F &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + imagef& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + + inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef &set_clipped(int x, int y, const vec4F &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_width()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline bool is_valid() const { return m_width > 0; } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint64_t get_total_pixels() const { return (uint64_t)m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_vec &get_pixels() const { return m_pixels; } + inline vec4F_vec &get_pixels() { return m_pixels; } + + inline const vec4F *get_ptr() const { return &m_pixels[0]; } + inline vec4F *get_ptr() { return &m_pixels[0]; } + + bool clean_astc_hdr_pixels(float highest_mag) + { + bool status = true; + bool nan_msg = false; + bool inf_msg = false; + bool neg_zero_msg = false; + bool neg_msg = false; + bool clamp_msg = false; + + for (uint32_t iy = 0; iy < m_height; iy++) + { + for (uint32_t ix = 0; ix < m_width; ix++) + { + vec4F& c = (*this)(ix, iy); + + for (uint32_t s = 0; s < 4; s++) + { + float &p = c[s]; + union { float f; uint32_t u; } x; x.f = p; + + if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000)) + { + if (std::isnan(p)) + { + if (!nan_msg) + { + fprintf(stderr, "One or more input pixels was NaN, setting to 0.\n"); + nan_msg = true; + } + } + + if (std::isinf(p)) + { + if (!inf_msg) + { + fprintf(stderr, "One or more input pixels was INF, setting to 0.\n"); + inf_msg = true; + } + } + + if (x.u == 0x80000000) + { + if (!neg_zero_msg) + { + fprintf(stderr, "One or more input pixels was -0, setting them to 0.\n"); + neg_zero_msg = true; + } + } + + p = 0.0f; + status = false; + } + else + { + //const float o = p; + if (p < 0.0f) + { + p = 0.0f; + + if (!neg_msg) + { + fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n"); + neg_msg = true; + } + + status = false; + } + + if (p > highest_mag) + { + p = highest_mag; + + if (!clamp_msg) + { + fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag); + clamp_msg = true; + } + + status = false; + } + } + } + } + } + + return status; + } + + imagef& flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + + return *this; + } + + bool has_alpha(uint32_t channel = 3) const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y)[channel] != 1.0f) + return true; + + return false; + } + + vec4F get_filtered_vec4F(float x, float y) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floorf(x); + int iy = (int)floorf(y); + float wx = x - ix; + float wy = y - iy; + + vec4F a(get_clamped(ix, iy)); + vec4F b(get_clamped(ix + 1, iy)); + vec4F c(get_clamped(ix, iy + 1)); + vec4F d(get_clamped(ix + 1, iy + 1)); + + vec4F result; + + for (uint32_t i = 0; i < 4; i++) + { + const float top = lerp((float)a[i], (float)b[i], wx); + const float bot = lerp((float)c[i], (float)d[i], wx); + const float m = lerp((float)top, (float)bot, wy); + + result[i] = m; + } + + return result; + } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_vec m_pixels; + }; + + // REC 709 coefficients + const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f; + + inline float get_luminance(const vec4F &c) + { + return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B; + } + + float linear_to_srgb(float l); + float srgb_to_linear(float s); + + class fast_linear_to_srgb + { + public: + fast_linear_to_srgb() + { + init(); + } + + void init() + { + for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) + { + float l = (float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1)); + m_linear_to_srgb_table[i] = (uint8_t)basisu::fast_floorf_int(255.0f * basisu::linear_to_srgb(l)); + } + + float srgb_to_linear[256]; + for (int i = 0; i < 256; i++) + srgb_to_linear[i] = basisu::srgb_to_linear((float)i / 255.0f); + + for (int i = 0; i < 256; i++) + m_srgb_to_linear_thresh[i] = (srgb_to_linear[i] + srgb_to_linear[basisu::minimum(i + 1, 255)]) * .5f; + } + + inline uint8_t convert(float l) const + { + assert((l >= 0.0f) && (l <= 1.0f)); + int j = basisu::fast_roundf_int((LINEAR_TO_SRGB_TABLE_SIZE - 1) * l); + + assert((j >= 0) && (j < LINEAR_TO_SRGB_TABLE_SIZE)); + int b = m_linear_to_srgb_table[j]; + + b += (l > m_srgb_to_linear_thresh[b]); + + return (uint8_t)b; + } + + private: + static constexpr int LINEAR_TO_SRGB_TABLE_SIZE = 2048; + uint8_t m_linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; + + float m_srgb_to_linear_thresh[256]; + }; + + extern fast_linear_to_srgb g_fast_linear_to_srgb; + + // Image metrics + + class image_metrics + { + public: + // TODO: Add ssim + uint32_t m_width, m_height; + double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + bool m_has_neg, m_hf_mag_overflow, m_any_abnormal; + uint64_t m_sum_a, m_sum_b; + + image_metrics() + { + clear(); + } + + void clear() + { + m_width = 0; + m_height = 0; + m_max = 0; + m_mean = 0; + m_mean_squared = 0; + m_rms = 0; + m_psnr = 0; + m_ssim = 0; + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + m_sum_a = 0; + m_sum_b = 0; + } + + void print(const char *pPrefix = nullptr) + { + //fmt_printf("{}Max: {3.3} Mean: {3.3} RMS: {3.3} PSNR: {2.3} dB, Sums: {} {}, Dim: {}x{}\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_sum_a, m_sum_b, m_width, m_height); + fmt_printf("{}Max: {3.3} Mean: {3.3} RMS: {3.3} PSNR: {2.3} dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); + } + + void print_hp(const char* pPrefix = nullptr) + { + //fmt_printf("{}Max: {3.6} Mean: {3.6} RMS: {3.6} PSNR: {2.6} dB, Any Neg: {}, Half float overflow: {}, Any NaN/Inf: {}, Sums: {} {}, Dim: {}x{}\n", + // pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal, m_sum_a, m_sum_b, m_width, m_height); + fmt_printf("{}Max: {3.6} Mean: {3.6} RMS: {3.6} PSNR: {2.6} dB, Any Neg: {}, Half float overflow: {}, Any NaN/Inf: {}\n", + pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); + } + + void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false); + void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); + }; + + void print_image_metrics(const image& a, const image& b); + + // Image saving/loading/resampling + + bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); + bool load_png(const char* pFilename, image& img); + inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } + + bool load_tga(const char* pFilename, image& img); + inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + + bool load_qoi(const char* pFilename, image& img); + + bool load_jpg(const char *pFilename, image& img); + bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img); + inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } + + // Currently loads .PNG, .TGA, or .JPG + bool load_image(const char* pFilename, image& img); + inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + + bool is_image_filename_hdr(const char* pFilename); + + // Supports .HDR and most (but not all) .EXR's (see TinyEXR). + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); + + inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f) + { + return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); + } + + enum class hdr_image_type + { + cHITRGBAHalfFloat = 0, + cHITRGBAFloat = 1, + cHITPNGImage = 2, + cHITEXRImage = 3, + cHITHDRImage = 4, + cHITJPGImage = 5 + }; + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + + struct rgbe_header_info + { + std::string m_program; + + // Note no validation is done, either gamma or exposure may be 0. + double m_gamma; + bool m_has_gamma; + + double m_exposure; // watts/steradian/m^2. + bool m_has_exposure; + + void clear() + { + m_program.clear(); + m_gamma = 1.0f; + m_has_gamma = false; + m_exposure = 1.0f; + m_has_exposure = false; + } + }; + + bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info); + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info); + + bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info); + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info); + + bool read_exr(const char* pFilename, imagef& img, int& n_chans); + bool read_exr(const void* pMem, size_t mem_size, imagef& img); + + enum + { + WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic + WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats + WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block) + }; + + // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images. + bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags); + + enum + { + cImageSaveGrayscale = 1, + cImageSaveIgnoreAlpha = 2 + }; + + bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); + inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } + + bool save_qoi(const char* pFilename, const image& img, uint32_t qoi_colorspace = 0); + inline bool save_qoi(const std::string& filename, const image& img, uint32_t qoi_colorspace = 0) { return save_qoi(filename.c_str(), img, qoi_colorspace); } + + bool read_file_to_vec(const char* pFilename, uint8_vec& data); + bool read_file_to_data(const char* pFilename, void *pData, size_t len); + + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); + + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } + + bool image_resample(const image &src, image &dst, bool srgb = false, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4, float filter_scale_y = -1.0f); + + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + + // Timing + + typedef uint64_t timer_ticks; + + class interval_timer + { + public: + interval_timer(); + + void start(); + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + + private: + static timer_ticks g_init_ticks, g_freq; + static double g_timer_freq; + + timer_ticks m_start_time, m_stop_time; + + bool m_started, m_stopped; + }; + + inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); } + + inline FILE *fopen_safe(const char *pFilename, const char *pMode) + { +#ifdef _WIN32 + FILE *pFile = nullptr; + fopen_s(&pFile, pFilename, pMode); + return pFile; +#else + return fopen(pFilename, pMode); +#endif + } + + void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); + + const uint32_t cPixelBlockWidth = 4; + const uint32_t cPixelBlockHeight = 4; + const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; + + struct pixel_block + { + color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; } + inline color_rgba* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector pixel_block_vec; + + struct pixel_block_hdr + { + vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const vec4F* get_ptr() const { return &m_pixels[0][0]; } + inline vec4F* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector pixel_block_hdr_vec; + + void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false); + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img); + bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img); + + // Intersection + enum eClear { cClear = 0 }; + enum eInitExpand { cInitExpand = 0 }; + enum eIdentity { cIdentity = 0 }; + + template + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; + + inline ray() { } + inline ray(eClear) { clear(); } + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { } + + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } + + inline const vector_type& get_origin(void) const { return m_origin; } + inline void set_origin(const vector_type& origin) { m_origin = origin; } + + inline const vector_type& get_direction(void) const { return m_direction; } + inline void set_direction(const vector_type& direction) { m_direction = direction; } + + inline void set_endpoints(const vector_type& start, const vector_type& end) + { + m_origin = start; + + m_direction = end - start; + m_direction.normalize_in_place(); + } + + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } + + private: + vector_type m_origin; + vector_type m_direction; + }; + + typedef ray ray2F; + typedef ray ray3F; + + template + class vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } + inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } + + inline vec_interval() { } + inline vec_interval(eClear) { clear(); } + inline vec_interval(eInitExpand) { init_expand(); } + + inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } + + inline void init_expand() + { + m_bounds[0].set(1e+30f, 1e+30f, 1e+30f); + m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f); + } + + inline vec_interval expand(const T& p) + { + for (uint32_t c = 0; c < N; c++) + { + if (p[c] < m_bounds[0][c]) + m_bounds[0][c] = p[c]; + + if (p[c] > m_bounds[1][c]) + m_bounds[1][c] = p[c]; + } + + return *this; + } + + inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; } + inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; } + + const T& get_low() const { return m_bounds[0]; } + T& get_low() { return m_bounds[0]; } + + const T& get_high() const { return m_bounds[1]; } + T& get_high() { return m_bounds[1]; } + + scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; } + + bool contains(const T& p) const + { + const T& low = get_low(), high = get_high(); + + for (uint32_t i = 0; i < N; i++) + { + if (p[i] < low[i]) + return false; + + if (p[i] > high[i]) + return false; + } + return true; + } + + private: + T m_bounds[2]; + }; + + typedef vec_interval vec_interval1F; + typedef vec_interval vec_interval2F; + typedef vec_interval vec_interval3F; + typedef vec_interval vec_interval4F; + + typedef vec_interval1F aabb1F; + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; + + namespace intersection + { + enum result + { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum + { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + else + max_t[i] = -1.0f; + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + if (max_t[which_plane] < max_t[i]) + which_plane = i; + + if (max_t[which_plane] < 0.0f) + return cFailure; + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) + return cFailure; + } + else + { + coord[i] = candidate_plane[i]; + } + + assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + typename vector_type::T diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + return res; + + t = basisu::maximum(0.0f, diag_dist - t); + return cSuccess; + } + + } // intersect + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + // Input cannot be negative, Inf or Nan. + inline basist::half_float float_to_half_non_neg_no_nan_inf(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF; + int e = 0, m = 0; + + assert(((fi.i >> 31) == 0) && (flt_e != 0xFF)); + + // not zero or denormal + if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = (int)lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = (int)lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((e << 10) | m); + return result; + } + + union fu32 + { + uint32_t u; + float f; + }; + + // Supports positive and denormals only. No NaN or Inf. + BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) + { + assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); + + // add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127) + static const fu32 K = { 0x77800000 }; + + fu32 o; + o.u = h << 13; + o.f *= K.f; + + return o.f; + } + + // Positive, negative, or denormals. No NaN or Inf. Clamped to MAX_HALF_FLOAT. + inline basist::half_float fast_float_to_half_trunc_no_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = minimum((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f; + + return (basist::half_float)(((fu.u >> (23 - 10)) & 0x7FFF) | ((f < 0.0f) ? 0x8000 : 0)); + } + + inline basist::half_float fast_float_to_half_trunc_no_clamp_neg_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = f * g_f_to_h.f; + + return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); + } + + inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = f * g_f_to_h.f; + + uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); + + // round to even or nearest + uint32_t mant = fu.u & 8191; // examine lowest 13 bits + uint32_t inc = (mant > 4096) | ((mant == 4096) & (h & 1)); + h += inc; + + if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS) + h = basist::MAX_HALF_FLOAT_AS_INT_BITS; + + return (basist::half_float)h; + } + + bool arith_test(); + + void set_image_alpha(image& img, uint32_t a); + + void create_bc7_debug_images( + uint32_t width, uint32_t height, + const void* pBlocks, + const char* pFilename_prefix); + + struct tri2 + { + vec2F p0, p1, p2; + vec2F t0, t1, t2; + color_rgba c0, c1, c2; + }; + + // simple non-perspective correct triangle rasterizer with texture mapping, useful for generating randomized test data + void draw_tri2(image& dst, const image* pTex, const tri2& tri, bool alpha_blend); + + void set_num_wasi_threads(uint32_t num_threads); + int get_num_hardware_threads(); + +} // namespace basisu + +#include "basisu_math.h" diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_etc.h b/Source/ThirdParty/basis_universal/encoder/basisu_etc.h new file mode 100644 index 000000000..1001d0c74 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_etc.h @@ -0,0 +1,1181 @@ +// basis_etc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "basisu_enc.h" + +namespace basisu +{ + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; + extern const uint8_t g_etc1_to_selector_index[cETC1SelectorValues]; + extern const uint8_t g_selector_index_to_etc1[cETC1SelectorValues]; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + extern const etc_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] + extern const uint32_t g_etc1_pixel_indices[2][2][8]; // [flipped][subblock][subblock_pixel] + + struct etc_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64_t m_uint64; + + uint8_t m_bytes[8]; + }; + + inline void clear() + { + assert(sizeof(*this) == 8); + clear_obj(*this); + } + + inline uint64_t get_all_bits() const + { + return read_be64(&m_uint64); + } + + inline uint32_t get_general_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + return (uint32_t)(read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + } + + inline void set_general_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + + uint64_t x = read_be64(&m_uint64); + uint64_t msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); + x &= ~msk; + x |= (static_cast(bits) << static_cast(ofs)); + write_be64(&m_uint64, x); + } + + inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num <= 8U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + assert(bits < (1U << num)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + const uint32_t mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint32_t get_inten_table(uint32_t subblock_id) const + { + assert(subblock_id < 2); + const uint32_t ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint32_t subblock_id, uint32_t t) + { + assert(subblock_id < 2); + assert(t < 8); + const uint32_t ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + inline void set_inten_tables_etc1s(uint32_t t) + { + set_inten_table(0, t); + set_inten_table(1, t); + } + + inline bool is_etc1s() const + { + if (get_inten_table(0) != get_inten_table(1)) + return false; + + if (get_diff_bit()) + { + if (get_delta3_color() != 0) + return false; + } + else + { + if (get_base4_color(0) != get_base4_color(1)) + return false; + } + + return true; + } + + // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector()) + inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const + { + assert((x | y) < 4); + + const uint32_t bit_index = x * 4 + y; + const uint32_t byte_bit_ofs = bit_index & 7; + const uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1; + const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1; + const uint32_t val = lsb | (msb << 1); + + return val; + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + return g_etc1_to_selector_index[get_raw_selector(x, y)]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + assert((x | y | val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = g_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + // Selector "etc1_val" ranges from 0-3 and is a direct (raw) ETC1 selector. + inline void set_raw_selector(uint32_t x, uint32_t y, uint32_t etc1_val) + { + assert((x | y | etc1_val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline uint32_t get_raw_selector_bits() const + { + return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); + } + + inline void set_raw_selector_bits(uint32_t bits) + { + m_bytes[4] = static_cast(bits); + m_bytes[5] = static_cast(bits >> 8); + m_bytes[6] = static_cast(bits >> 16); + m_bytes[7] = static_cast(bits >> 24); + } + + inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3) + { + m_bytes[4] = byte0; + m_bytes[5] = byte1; + m_bytes[6] = byte2; + m_bytes[7] = byte3; + } + + inline void set_base4_color(uint32_t idx, uint16_t c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16_t c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16_t get_base5_color() const + { + const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16_t c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + uint64_t determine_selectors(const color_rgba* pSource_pixels, bool perceptual, uint32_t begin_subblock = 0, uint32_t end_subblock = 2) + { + uint64_t total_error = 0; + + for (uint32_t subblock = begin_subblock; subblock < end_subblock; subblock++) + { + color_rgba block_colors[4]; + get_block_colors(block_colors, subblock); + + if (get_flip_bit()) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[x + (subblock * 2 + y) * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(x, subblock * 2 + y, best_selector); + + total_error += best_error; + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[(subblock * 2) + x + y * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(subblock * 2 + x, y, best_selector); + + total_error += best_error; + } + } + } + } + + return total_error; + } + + color_rgba get_block_color(uint32_t subblock_index, bool scaled) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), scaled); + else + unpack_color5(b, get_base5_color(), scaled); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), scaled); + } + + return b; + } + + uint32_t get_subblock_index(uint32_t x, uint32_t y) const + { + if (get_flip_bit()) + return y >= 2; + else + return x >= 2; + } + + bool get_block_colors(color_rgba* pBlock_colors, uint32_t subblock_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + bool dc = false; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0], dc), clamp255(b.g + pInten_table[0], dc), clamp255(b.b + pInten_table[0], dc), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1], dc), clamp255(b.g + pInten_table[1], dc), clamp255(b.b + pInten_table[1], dc), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2], dc), clamp255(b.g + pInten_table[2], dc), clamp255(b.b + pInten_table[2], dc), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3], dc), clamp255(b.g + pInten_table[3], dc), clamp255(b.b + pInten_table[3], dc), 255); + + return dc; + } + + void get_block_colors_etc1s(color_rgba* pBlock_colors) const + { + color_rgba b; + + unpack_color5(b, get_base5_color(), true); + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(0)]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_colors_etc1s(color_rgba* pBlock_colors, const color_rgba &base5_color, uint32_t inten_table) + { + color_rgba b; + b.r = (base5_color.r << 3U) | (base5_color.r >> 2U); + b.g = (base5_color.g << 3U) | (base5_color.g >> 2U); + b.b = (base5_color.b << 3U) | (base5_color.b >> 2U); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + void get_block_color(color_rgba& color, uint32_t subblock_index, uint32_t selector_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + color.set(clamp255(b.r + pInten_table[selector_index]), clamp255(b.g + pInten_table[selector_index]), clamp255(b.b + pInten_table[selector_index]), 255); + } + + bool get_block_low_high_colors(color_rgba* pBlock_colors, uint32_t subblock_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + bool dc = false; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0], dc), clamp255(b.g + pInten_table[0], dc), clamp255(b.b + pInten_table[0], dc), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[3], dc), clamp255(b.g + pInten_table[3], dc), clamp255(b.b + pInten_table[3], dc), 255); + + return dc; + } + + static void get_block_colors5(color_rgba *pBlock_colors, const color_rgba &base_color5, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color5); + + if (!scaled) + { + b.r = (b.r << 3) | (b.r >> 2); + b.g = (b.g << 3) | (b.g >> 2); + b.b = (b.b << 3) | (b.b >> 2); + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_colors4(color_rgba *pBlock_colors, const color_rgba &base_color4, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color4); + + if (!scaled) + { + b.r = (b.r << 4) | b.r; + b.g = (b.g << 4) | b.g; + b.b = (b.b << 4) | b.b; + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + uint64_t evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index = -1) const; + void get_subblock_pixels(color_rgba* pPixels, int subblock_index = -1) const; + + void get_selector_range(uint32_t& low, uint32_t& high) const + { + low = 3; + high = 0; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t s = get_selector(x, y); + low = minimum(low, s); + high = maximum(high, s); + } + } + } + + void set_block_color4(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(false); + + set_base4_color(0, pack_color4(c0_unscaled, false)); + set_base4_color(1, pack_color4(c1_unscaled, false)); + } + + void set_block_color5(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + set_delta3_color(pack_delta3(dr, dg, db)); + } + + void set_block_color5_etc1s(const color_rgba &c_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c_unscaled, false)); + set_delta3_color(pack_delta3(0, 0, 0)); + } + + bool set_block_color5_check(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) || + ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) || + ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax))) + return false; + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + + bool set_block_color5_clamp(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + dr = clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + color_rgba get_selector_color(uint32_t x, uint32_t y, uint32_t s) const + { + color_rgba block_colors[4]; + + get_block_colors(block_colors, get_subblock_index(x, y)); + + return block_colors[s]; + } + + // Base color 5 + static uint16_t pack_color5(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha = 255U); + static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color, bool scaled); + static void unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled); + + static bool unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + static bool unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16_t pack_delta3(const color_rgba_i16& color); + static uint16_t pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static color_rgba_i16 unpack_delta3(uint16_t packed_delta3); + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3); + + static bool try_pack_color5_delta3(const color_rgba *pColor5_unscaled) + { + int dr = pColor5_unscaled[1].r - pColor5_unscaled[0].r; + int dg = pColor5_unscaled[1].g - pColor5_unscaled[0].g; + int db = pColor5_unscaled[1].b - pColor5_unscaled[0].b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + + return true; + } + + // Abs color 4 + static uint16_t pack_color4(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha = 255U); + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx); + static bool get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx); + static void get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx); + + static inline void unscaled_to_scaled_color(color_rgba& dst, const color_rgba& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + + private: + static uint8_t clamp255(int x, bool &did_clamp) + { + if (x < 0) + { + did_clamp = true; + return 0; + } + else if (x > 255) + { + did_clamp = true; + return 255; + } + + return static_cast(x); + } + + static uint8_t clamp255(int x) + { + if (x < 0) + return 0; + else if (x > 255) + return 255; + + return static_cast(x); + } + }; + + typedef basisu::vector etc_block_vec; + + // Returns false if the unpack fails (could be bogus data or ETC2) + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); + + enum basis_etc_quality + { + cETCQualityFast, + cETCQualityMedium, + cETCQualitySlow, + cETCQualityUber, + cETCQualityTotal, + }; + + struct basis_etc1_pack_params + { + basis_etc_quality m_quality; + bool m_perceptual; + bool m_cluster_fit; + bool m_force_etc1s; + bool m_use_color4; + float m_flip_bias; + + inline basis_etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cETCQualitySlow; + m_perceptual = true; + m_cluster_fit = true; + m_force_etc1s = false; + m_use_color4 = true; + m_flip_bias = 0.0f; + } + }; + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint32_t r, uint32_t g, uint32_t b, uint32_t inten_table, bool color4) : + m_unscaled_color((uint8_t)r, (uint8_t)g, (uint8_t)b, 255), + m_inten_table((uint8_t)inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_rgba& c, uint32_t inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline void init(const color_rgba& c, uint32_t inten_table, bool color4) + { + m_unscaled_color = c; + m_inten_table = inten_table; + m_color4 = color4; + } + + inline color_rgba get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_rgba((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255); + } + + // returns true if anything was clamped + inline void get_block_colors(color_rgba* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0], 255); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1], 255); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2], 255); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3], 255); + } + + color_rgba m_unscaled_color; + uint32_t m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(etc1_optimizer); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; + } + + struct params; + + typedef bool(*evaluate_solution_override_func)(uint64_t &error, const params &p, const color_rgba* pBlock_colors, const uint8_t* pSelectors, const etc1_solution_coordinates& coords); + + struct params : basis_etc1_pack_params + { + params() + { + clear(); + } + + params(const basis_etc1_pack_params& base_params) + { + clear_optimizer_params(); + + *static_cast(this) = base_params; + } + + void clear() + { + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + basis_etc1_pack_params::clear(); + + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + + m_refinement = true; + + m_pForce_selectors = nullptr; + } + + uint32_t m_num_src_pixels; + const color_rgba* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint32_t m_scan_delta_size; + + color_rgba m_base_color5; + bool m_constrain_against_base_color5; + + bool m_refinement; + + const uint8_t* m_pForce_selectors; + }; + + struct results + { + uint64_t m_error; + color_rgba m_block_color_unscaled; + uint32_t m_block_inten_table; + uint32_t m_n; + uint8_t* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + memcpy(m_pSelectors, rhs.m_pSelectors, minimum(rhs.m_n, m_n)); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + const params* get_params() const { return m_pParams; } + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(UINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + basisu::vector m_selectors; + uint64_t m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = UINT64_MAX; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (!m_selectors.size()) + return false; + const uint32_t s = m_selectors[0]; + for (uint32_t i = 1; i < m_selectors.size(); i++) + if (m_selectors[i] != s) + return false; + return true; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + int m_max_comp_spread; + basisu::vector m_luma; + basisu::vector m_sorted_luma; + basisu::vector m_sorted_luma_indices; + const uint32_t* m_pSorted_luma_indices; + uint32_t* m_pSorted_luma; + + basisu::vector m_selectors; + basisu::vector m_best_selectors; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + basisu::vector m_temp_selectors; + + enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; + uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; + + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) + { + first_inten_table = maximum(idx - 1, 0); + last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); + } + + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); + bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + + inline bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_quality >= cETCQualityMedium) + return evaluate_solution_slow(coords, trial_solution, pBest_solution); + else + return evaluate_solution_fast(coords, trial_solution, pBest_solution); + } + + void refine_solution(uint32_t max_refinement_trials); + void compute_internal_neighborhood(int scan_r, int scan_g, int scan_b); + void compute_internal_cluster_fit(uint32_t total_perms_to_try); + }; + + struct pack_etc1_block_context + { + etc1_optimizer m_optimizer; + }; + + void pack_etc1_solid_color_init(); + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); + + // ETC EAC + extern const int8_t g_etc2_eac_tables[16][8]; + extern const int8_t g_etc2_eac_tables8[16][8]; + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + struct eac_a8_block + { + uint16_t m_base : 8; + uint16_t m_table : 4; + uint16_t m_multiplier : 4; + + uint8_t m_selectors[6]; + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const + { + assert((x < 4) && (y < 4)); + return static_cast((selector_bits >> (45 - (y + x * 4) * 3)) & 7); + } + + inline uint64_t get_selector_bits() const + { + uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; + return pixels; + } + + inline void set_selector_bits(uint64_t pixels) + { + m_selectors[0] = (uint8_t)(pixels >> 40); + m_selectors[1] = (uint8_t)(pixels >> 32); + m_selectors[2] = (uint8_t)(pixels >> 24); + m_selectors[3] = (uint8_t)(pixels >> 16); + m_selectors[4] = (uint8_t)(pixels >> 8); + m_selectors[5] = (uint8_t)(pixels); + } + + void set_selector(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 8)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + uint64_t pixels = get_selector_bits(); + + pixels &= ~(7ULL << ofs); + pixels |= (static_cast(s) << ofs); + + set_selector_bits(pixels); + } + }; + + struct etc2_rgba_block + { + eac_a8_block m_alpha; + etc_block m_rgb; + }; + + struct pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + uint8_vec m_selectors; + uint8_vec m_selectors_temp; + }; + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_frontend.h b/Source/ThirdParty/basis_universal/encoder/basisu_frontend.h new file mode 100644 index 000000000..a5aadb34a --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_frontend.h @@ -0,0 +1,355 @@ +// basisu_frontend.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "basisu_etc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_transcoder.h" + +namespace basisu +{ + struct opencl_context; + typedef opencl_context* opencl_context_ptr; + + struct vec2U + { + uint32_t m_comps[2]; + + vec2U() { } + vec2U(uint32_t a, uint32_t b) { set(a, b); } + + void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } + + uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } + uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } + }; + + // rg [11/25/25] - The command line tool defaults to ETC1S level 1, but the API 2. Changing this breaks backwards compatibility for anyone using the API and our test suite. + const uint32_t BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL = 2; + + const uint32_t BASISU_MAX_ETC1S_COMPRESSION_LEVEL = 6; + + class basisu_frontend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); + + public: + + basisu_frontend() : + m_total_blocks(0), + m_total_pixels(0), + m_endpoint_refinement(false), + m_use_hierarchical_endpoint_codebooks(false), + m_use_hierarchical_selector_codebooks(false), + m_num_endpoint_codebook_iterations(0), + m_num_selector_codebook_iterations(0), + m_opencl_failed(false) + { + } + + enum + { + cMaxEndpointClusters = 16128, + + cMaxSelectorClusters = 16128, + }; + + struct params + { + params() : + m_num_source_blocks(0), + m_pSource_blocks(NULL), + m_max_endpoint_clusters(256), + m_max_selector_clusters(256), + m_compression_level(BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL), + m_perceptual(true), + m_debug_stats(false), + m_debug_images(false), + m_dump_endpoint_clusterization(true), + m_validate(false), + m_multithreaded(false), + m_disable_hierarchical_endpoint_codebooks(false), + m_tex_type(basist::cBASISTexType2D), + m_pOpenCL_context(nullptr), + m_pJob_pool(nullptr) + { + } + + uint32_t m_num_source_blocks; + pixel_block *m_pSource_blocks; + + uint32_t m_max_endpoint_clusters; + uint32_t m_max_selector_clusters; + + uint32_t m_compression_level; + + bool m_perceptual; + bool m_debug_stats; + bool m_debug_images; + bool m_dump_endpoint_clusterization; + bool m_validate; + bool m_multithreaded; + bool m_disable_hierarchical_endpoint_codebooks; + + basist::basis_texture_type m_tex_type; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + opencl_context_ptr m_pOpenCL_context; + + job_pool *m_pJob_pool; + }; + + bool init(const params &p); + + bool compress(); + + const params &get_params() const { return m_params; } + + const pixel_block &get_source_pixel_block(uint32_t i) const { return m_source_blocks[i]; } + + // RDO output blocks + uint32_t get_total_output_blocks() const { return static_cast(m_encoded_blocks.size()); } + + const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } + const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } + + // "Best" ETC1S blocks + const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } + + // Per-block flags + bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } + + // Endpoint clusters + uint32_t get_total_endpoint_clusters() const { return static_cast(m_endpoint_clusters.size()); } + uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } + + const color_rgba &get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } + uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } + + bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } + + // Selector clusters + uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_block_indices.size()); } + uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } + const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } + + // Returns block indices using each selector cluster + const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } + + void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); + + void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); + + bool get_opencl_failed() const { return m_opencl_failed; } + + private: + params m_params; + uint32_t m_total_blocks; + uint32_t m_total_pixels; + + bool m_endpoint_refinement; + bool m_use_hierarchical_endpoint_codebooks; + bool m_use_hierarchical_selector_codebooks; + + uint32_t m_num_endpoint_codebook_iterations; + uint32_t m_num_selector_codebook_iterations; + + // Source pixels for each blocks + pixel_block_vec m_source_blocks; + + // The quantized ETC1S texture. + etc_block_vec m_encoded_blocks; + + // Quantized blocks after endpoint quant, but before selector quant + etc_block_vec m_orig_encoded_blocks; + + // Full quality ETC1S texture + etc_block_vec m_etc1_blocks_etc1s; + + typedef vec<6, float> vec6F; + + // Endpoint clusterizer + typedef tree_vector_quant vec6F_quantizer; + vec6F_quantizer m_endpoint_clusterizer; + + // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. + basisu::vector m_endpoint_clusters; + + // Array of subblock indices for each parent endpoint cluster + // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. + // As the endpoint clusters are manipulated this constraint gets broken. + basisu::vector m_endpoint_parent_clusters; + + // Each block's parent endpoint cluster index + uint8_vec m_block_parent_endpoint_cluster; + + // Array of endpoint cluster indices for each parent endpoint cluster + basisu::vector m_endpoint_clusters_within_each_parent_cluster; + + struct endpoint_cluster_etc_params + { + endpoint_cluster_etc_params() + { + clear(); + } + + void clear() + { + clear_obj(m_color_unscaled); + clear_obj(m_inten_table); + clear_obj(m_color_error); + m_subblocks.clear(); + + clear_obj(m_color_used); + m_valid = false; + } + + // TODO: basisu doesn't use individual mode. + color_rgba m_color_unscaled[2]; // [use_individual_mode] + uint32_t m_inten_table[2]; + + uint64_t m_color_error[2]; + + uint_vec m_subblocks; + + bool m_color_used[2]; + + bool m_valid; + + bool operator== (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] != other.m_inten_table[0]) + return false; + if (m_inten_table[1] != other.m_inten_table[1]) + return false; + + return true; + } + + bool operator< (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] < other.m_color_unscaled[i]) + return true; + else if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] < other.m_inten_table[0]) + return true; + else if (m_inten_table[0] == other.m_inten_table[0]) + { + if (m_inten_table[1] < other.m_inten_table[1]) + return true; + } + + return false; + } + }; + + typedef basisu::vector cluster_subblock_etc_params_vec; + + // Each endpoint cluster's ETC1S parameters + cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; + + // The endpoint cluster index used by each ETC1 subblock. + basisu::vector m_block_endpoint_clusters_indices; + + // The block(s) within each selector cluster + // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! + basisu::vector m_selector_cluster_block_indices; + + // The selector bits for each selector cluster. + basisu::vector m_optimized_cluster_selectors; + + // The block(s) within each parent selector cluster. + basisu::vector m_selector_parent_cluster_block_indices; + + // Each block's parent selector cluster + uint8_vec m_block_parent_selector_cluster; + + // Array of selector cluster indices for each parent selector cluster + basisu::vector m_selector_clusters_within_each_parent_cluster; + + // Each block's selector cluster index + basisu::vector m_block_selector_cluster_index; + + struct subblock_endpoint_quant_err + { + uint64_t m_total_err; + uint32_t m_cluster_index; + uint32_t m_cluster_subblock_index; + uint32_t m_block_index; + uint32_t m_subblock_index; + + bool operator< (const subblock_endpoint_quant_err &rhs) const + { + if (m_total_err < rhs.m_total_err) + return true; + else if (m_total_err == rhs.m_total_err) + { + if (m_block_index < rhs.m_block_index) + return true; + else if (m_block_index == rhs.m_block_index) + return m_subblock_index < rhs.m_subblock_index; + } + return false; + } + }; + + // The sorted subblock endpoint quant error for each endpoint cluster + basisu::vector m_subblock_endpoint_quant_err_vec; + + std::mutex m_lock; + + bool m_opencl_failed; + + //----------------------------------------------------------------------------- + + void init_etc1_images(); + bool init_global_codebooks(); + void init_endpoint_training_vectors(); + void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); + void generate_endpoint_clusters(); + void compute_endpoint_subblock_error_vec(); + void introduce_new_endpoint_clusters(); + void generate_endpoint_codebook(uint32_t step); + uint32_t refine_endpoint_clusterization(); + void eliminate_redundant_or_empty_endpoint_clusters(); + void generate_block_endpoint_clusters(); + void compute_endpoint_clusters_within_each_parent_cluster(); + void compute_selector_clusters_within_each_parent_cluster(); + void create_initial_packed_texture(); + void generate_selector_clusters(); + void create_optimized_selector_codebook(uint32_t iter); + void find_optimal_selector_clusters_for_each_block(); + uint32_t refine_block_endpoints_given_selectors(); + void finalize(); + bool validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const; + bool validate_output() const; + void introduce_special_selector_clusters(); + void optimize_selector_codebook(); + bool check_etc1s_constraints() const; + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_gpu_texture.h b/Source/ThirdParty/basis_universal/encoder/basisu_gpu_texture.h new file mode 100644 index 000000000..bcfc9cb49 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_gpu_texture.h @@ -0,0 +1,184 @@ +// basisu_gpu_texture.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "basisu_etc.h" + +namespace basisu +{ + // GPU texture "image" + class gpu_image + { + public: + enum { cMaxBlockSize = 12 }; + + gpu_image() + { + clear(); + } + + gpu_image(texture_format fmt, uint32_t width, uint32_t height) + { + init(fmt, width, height); + } + + void clear() + { + m_fmt = texture_format::cInvalidTextureFormat; + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_qwords_per_block = 0; + m_blocks.clear(); + } + + inline texture_format get_format() const { return m_fmt; } + inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); } + inline bool is_ldr() const { return !is_hdr_texture_format(m_fmt); } + + // Width/height in pixels + inline uint32_t get_pixel_width() const { return m_width; } + inline uint32_t get_pixel_height() const { return m_height; } + + // Width/height in blocks, row pitch is assumed to be m_blocks_x. + inline uint32_t get_blocks_x() const { return m_blocks_x; } + inline uint32_t get_blocks_y() const { return m_blocks_y; } + + // Size of each block in pixels + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + + inline uint32_t get_qwords_per_block() const { return m_qwords_per_block; } + inline uint32_t get_total_blocks() const { return m_blocks_x * m_blocks_y; } + inline uint32_t get_bytes_per_block() const { return get_qwords_per_block() * sizeof(uint64_t); } + inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); } + + inline const uint64_vec &get_blocks() const { return m_blocks; } + + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } + inline uint64_t *get_ptr() { return &m_blocks[0]; } + + inline uint32_t get_size_in_bytes() const { return get_total_blocks() * get_qwords_per_block() * sizeof(uint64_t); } + + inline const void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) const + { + assert(block_x < m_blocks_x && block_y < m_blocks_y); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + inline void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) + { + assert(block_x < m_blocks_x && block_y < m_blocks_y && element_index < m_qwords_per_block); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + void init(texture_format fmt, uint32_t width, uint32_t height) + { + m_fmt = fmt; + m_width = width; + m_height = height; + m_block_width = basisu::get_block_width(m_fmt); + m_block_height = basisu::get_block_height(m_fmt); + m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + m_qwords_per_block = basisu::get_qwords_per_block(m_fmt); + + m_blocks.resize(0); + m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block); + } + + // Unpacks LDR textures only. Asserts and returns false otherwise. + // astc_srgb: true to use the ASTC sRGB decode profile, false for linear. + // For XUASTC LDR, this should match what was used during encoding. For ETC1S/UASTC LDR 4x4, this should be false. + bool unpack(image& img, bool astc_srgb) const; + + // Unpacks HDR textures only. Asserts and returns false otherwise. + bool unpack_hdr(imagef& img) const; + + inline void override_dimensions(uint32_t w, uint32_t h) + { + m_width = w; + m_height = h; + } + + private: + texture_format m_fmt; + uint32_t m_width, m_height, m_blocks_x, m_blocks_y, m_block_width, m_block_height, m_qwords_per_block; + uint64_vec m_blocks; + }; + + typedef basisu::vector gpu_image_vec; + + // KTX1 file writing - compatible with ARM's astcenc tool, and some other tools. + // Note astc_linear_flag used to be always effectively true in older code. It's ignored for ASTC HDR formats. + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag, bool astc_srgb_flag); + + bool does_dds_support_format(texture_format fmt); + bool write_dds_file(uint8_vec& dds_data, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); + bool write_dds_file(const char* pFilename, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); + + // Currently reads 2D 32bpp RGBA, 16-bit HALF RGBA, or 32-bit FLOAT RGBA, with or without mipmaps. No tex arrays or cubemaps, yet. + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector& ldr_mips, basisu::vector& hdr_mips); + + // Supports DDS and KTX + bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag, bool use_srgb_format); + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format); + bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format); + + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); + + // GPU texture block unpacking + // For ETC1, use in basisu_etc.h: bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels); + bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha); + void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); + bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); + void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); + +#if 0 + bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); + int determine_bc7_mode(const void* pBlock); + int determine_bc7_mode_4_index_mode(const void* pBlock); + int determine_bc7_mode_4_or_5_rotation(const void* pBlock); + bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format +#endif + + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA) + void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); + // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. + bool unpack_fxt1(const void* p, color_rgba* pPixels); + // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC. + bool unpack_pvrtc2(const void* p, color_rgba* pPixels); + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); + void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); + + // unpack_block() is primarily intended to unpack texture data created by the transcoder. + // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation. + // Unpacks LDR texture formats only. + bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels, bool astc_srgb); + + // Unpacks HDR texture formats only. + bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels); + + bool read_astc_file(const uint8_t* pImage_data, size_t image_data_size, vector2D& blocks, uint32_t& block_width, uint32_t& block_height, uint32_t& width, uint32_t& height); + bool read_astc_file(const char* pFilename, vector2D& blocks, uint32_t& block_width, uint32_t& block_height, uint32_t& width, uint32_t& height); + bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y); + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_kernels_declares.h b/Source/ThirdParty/basis_universal/encoder/basisu_kernels_declares.h new file mode 100644 index 000000000..9b85a594e --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_kernels_declares.h @@ -0,0 +1,27 @@ +// basisu_kernels_declares.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if BASISU_SUPPORT_SSE +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); + +void CPPSPMD_NAME(update_covar_matrix_16x16)(uint32_t num_vecs, const void* pWeighted_vecs, const void *pOrigin, const uint32_t* pVec_indices, void *pMatrix16x16); +#endif diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_kernels_imp.h b/Source/ThirdParty/basis_universal/encoder/basisu_kernels_imp.h new file mode 100644 index 000000000..b8addffb6 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_kernels_imp.h @@ -0,0 +1,652 @@ +// basisu_kernels_imp.h - Do not directly include +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using namespace CPPSPMD; + +namespace CPPSPMD_NAME(basisu_kernels_namespace) +{ + static inline int64_t reduce_add64(const vint &x) + { + return (int64_t)VINT_EXTRACT(x, 0) + (int64_t)VINT_EXTRACT(x, 1) + + (int64_t)VINT_EXTRACT(x, 2) + (int64_t)VINT_EXTRACT(x, 3); + } + struct perceptual_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 14 + dg * 45 + db * 5; + vint delta_cr = dr * 64 - delta_l; + vint delta_cb = db * 64 - delta_l; + + vint id = ((delta_l * delta_l) >> 5) + + ((((delta_cr * delta_cr) >> 5) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 5) * 3) >> 7); + + *pDistance += reduce_add64(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 14 + dg * 45 + db * 5; + int delta_cr = dr * 64 - delta_l; + int delta_cb = db * 64 - delta_l; + + int id = ((delta_l * delta_l) >> 5) + + ((((delta_cr * delta_cr) >> 5) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 5) * 3) >> 7); + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct linear_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + *pDistance += reduce_add64(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 14 + dg * 45 + db * 5; + vint delta_cr = dr * 64 - delta_l; + vint delta_cb = db * 64 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 5) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 5) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 5) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add64(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 14 + dg * 45 + db * 5; + int delta_cr = dr * 64 - delta_l; + int delta_cb = db * 64 - delta_l; + + int id = ((delta_l * delta_l) >> 5) + + ((((delta_cr * delta_cr) >> 5) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 5) * 3) >> 7); + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add64(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_lowest_error_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 14 + dg * 45 + db * 5; + vint delta_cr = dr * 64 - delta_l; + vint delta_cb = db * 64 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 5) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 5) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 5) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add64(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 14 + dg * 45 + db * 5; + int delta_cr = dr * 64 - delta_l; + int delta_cb = db * 64 - delta_l; + + int id = ((delta_l * delta_l) >> 5) + + ((((delta_cr * delta_cr) >> 5) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 5) * 3) >> 7); + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + + struct find_lowest_error_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add64(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + + struct update_covar_matrix_16x16 : spmd_kernel + { + void _call( + uint32_t num_vecs, const void* pWeighted_vecs_void, const void* pOrigin_void, const uint32_t* pVec_indices, void* pMatrix16x16_void) + { + const std::pair* pWeighted_vecs = static_cast< const std::pair *>(pWeighted_vecs_void); + + const float* pOrigin = static_cast(pOrigin_void); + vfloat org0 = loadu_linear_all(pOrigin), org1 = loadu_linear_all(pOrigin + 4), org2 = loadu_linear_all(pOrigin + 8), org3 = loadu_linear_all(pOrigin + 12); + + vfloat mat[16][4]; + vfloat vzero(zero_vfloat()); + + for (uint32_t i = 0; i < 16; i++) + { + store_all(mat[i][0], vzero); + store_all(mat[i][1], vzero); + store_all(mat[i][2], vzero); + store_all(mat[i][3], vzero); + } + + for (uint32_t k = 0; k < num_vecs; k++) + { + const uint32_t vec_index = pVec_indices[k]; + + const float* pW = pWeighted_vecs[vec_index].first.get_ptr(); + vfloat weight((float)pWeighted_vecs[vec_index].second); + + vfloat vec[4] = { loadu_linear_all(pW) - org0, loadu_linear_all(pW + 4) - org1, loadu_linear_all(pW + 8) - org2, loadu_linear_all(pW + 12) - org3 }; + + vfloat wvec0 = vec[0] * weight, wvec1 = vec[1] * weight, wvec2 = vec[2] * weight, wvec3 = vec[3] * weight; + + for (uint32_t j = 0; j < 16; j++) + { + vfloat vx = ((const float*)vec)[j]; + + store_all(mat[j][0], mat[j][0] + vx * wvec0); + store_all(mat[j][1], mat[j][1] + vx * wvec1); + store_all(mat[j][2], mat[j][2] + vx * wvec2); + store_all(mat[j][3], mat[j][3] + vx * wvec3); + + } // j + + } // k + + float* pMatrix = static_cast(pMatrix16x16_void); + + float* pDst = pMatrix; + for (uint32_t i = 0; i < 16; i++) + { + storeu_linear_all(pDst, mat[i][0]); + storeu_linear_all(pDst + 4, mat[i][1]); + storeu_linear_all(pDst + 8, mat[i][2]); + storeu_linear_all(pDst + 12, mat[i][3]); + pDst += 16; + } + } + }; + +} // namespace + +using namespace CPPSPMD_NAME(basisu_kernels_namespace); + +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< perceptual_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< linear_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t *pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_perceptual_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_linear_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_perceptual_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_linear_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + +void CPPSPMD_NAME(update_covar_matrix_16x16)(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16) +{ + spmd_call < update_covar_matrix_16x16 >(num_vecs, pWeighted_vecs, pOrigin, pVec_indices, pMatrix16x16); +} diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_math.h b/Source/ThirdParty/basis_universal/encoder/basisu_math.h new file mode 100644 index 000000000..5e8e8079c --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_math.h @@ -0,0 +1,3253 @@ +// File: basisu_math.h +#pragma once + +// TODO: Would prefer this in the basisu namespace, but to avoid collisions with the existing vec/matrix classes I'm placing this in "bu_math". +namespace bu_math +{ + // Cross-platform 1.0f/sqrtf(x) approximation. See https://en.wikipedia.org/wiki/Fast_inverse_square_root#cite_note-37. + // Would prefer using SSE1 etc. but that would require implementing multiple versions and platform divergence (needing more testing). + BASISU_FORCE_INLINE float inv_sqrt(float v) + { + union + { + float flt; + uint32_t ui; + } un; + + un.flt = v; + un.ui = 0x5F1FFFF9UL - (un.ui >> 1); + + return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt)); + } + + inline float linstep(float edge0, float edge1, float x) + { + assert(edge1 != edge0); + + // Scale, and clamp x to 0..1 range + x = basisu::saturate((x - edge0) / (edge1 - edge0)); + + return x; + } + + inline float smoothstep(float edge0, float edge1, float x) + { + assert(edge1 != edge0); + + // Scale, and clamp x to 0..1 range + x = basisu::saturate((x - edge0) / (edge1 - edge0)); + + return x * x * (3.0f - 2.0f * x); + } + + template + class vec : public basisu::rel_ops > + { + public: + typedef T scalar_type; + enum + { + num_elements = N + }; + + inline vec() + { + } + + inline vec(basisu::eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + template + inline explicit vec(Args... args) + { + // static_assert(sizeof...(args) <= N); + set(args...); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint32_t m = basisu::minimum(N, ON); + uint32_t i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for (; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint32_t index, T val) + { + assert(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set_all(T val) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + template + inline vec& set(Args... args) + { + // static_assert(sizeof...(args) <= N); + + // Initialize using parameter pack expansion + T values[] = { static_cast(args)... }; + + // Special case if setting with a scalar + if (sizeof...(args) == 1) + { + set_all(values[0]); + } + else + { + // Copy the values into the vector + for (std::size_t i = 0; i < sizeof...(args); ++i) + { + m_s[i] = values[i]; + } + + // Zero-initialize the remaining elements (if any) + if (sizeof...(args) < N) + { + std::fill(m_s + sizeof...(args), m_s + N, T{}); + } + } + + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint32_t s = basisu::minimum(N, O); + + uint32_t i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for (; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[](uint32_t i) const + { + assert(i < N); + return m_s[i]; + } + + inline T& operator[](uint32_t i) + { + assert(i < N); + return m_s[i]; + } + + template + inline uint64_t get_component_bits_as_uint() const + { + // static_assert(index < N); + // static_assert((sizeof(T) == sizeof(uint16_t)) || (sizeof(T) == sizeof(uint32_t)) || (sizeof(T) == sizeof(uint64_t)), "Unsupported type"); + + if (sizeof(T) == sizeof(uint16_t)) + return *reinterpret_cast(&m_s[index]); + else if (sizeof(T) == sizeof(uint32_t)) + return *reinterpret_cast(&m_s[index]); + else if (sizeof(T) == sizeof(uint64_t)) + return *reinterpret_cast(&m_s[index]); + else + { + assert(0); + return 0; + } + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + // static_assert(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + // static_assert(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + // static_assert(N >= 4); + return m_s[3]; + } + + inline vec get_x_vector() const + { + return broadcast<0>(); + } + inline vec get_y_vector() const + { + return broadcast<1>(); + } + inline vec get_z_vector() const + { + return broadcast<2>(); + } + inline vec get_w_vector() const + { + return broadcast<3>(); + } + + inline T get_component(uint32_t i) const + { + return (*this)[i]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + // static_assert(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + // static_assert(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + // static_assert(N >= 4); + m_s[3] = v; + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_s[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_s[0]); + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint32_t i, uint32_t j) const + { + assert((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const + { + assert((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + assert((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + template + inline vec broadcast() const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint32_t i, uint32_t j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator*=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator/=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator*(T val, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = val * rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + // static_assert(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross2(const vec& b) const + { + return cross2(*this, b); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + // static_assert(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross3(const vec& b) const + { + return cross3(*this, b); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + // static_assert(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline vec<3, T> cross(const vec& b) const + { + // static_assert(N >= 2); + return cross(*this, b); + } + + inline T dot(const vec& rhs) const + { + return dot(*this, rhs); + } + + inline vec dot_vector(const vec& rhs) const + { + return vec(dot(*this, rhs)); + } + + static inline T dot(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint32_t i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + inline T dot2(const vec& rhs) const + { + // static_assert(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + // static_assert(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T dot4(const vec& rhs) const + { + // static_assert(N >= 4); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + // returns squared length (norm) + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast(1.0f / sqrt(n)); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + // static_assert(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(basisu::clamp(m_s[i], l, h)); + return *this; + } + + inline vec& saturate() + { + return clamp(0.0f, 1.0f); + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(basisu::clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint32_t get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint32_t get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline void get_projection_axes(uint32_t& u, uint32_t& v) const + { + const int axis = get_major_axis(); + if (m_s[axis] < 0.0f) + { + v = basisu::next_wrap(axis, N); + u = basisu::next_wrap(v, N); + } + else + { + u = basisu::next_wrap(axis, N); + v = basisu::next_wrap(u, N); + } + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = basisu::minimum(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = basisu::maximum(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = basisu::minimum(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = basisu::maximum(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + *this -= (dot(dir) * dir); + return *this; + } + + inline vec get_remove_unit_direction(const vec& dir) const + { + return *this - (dot(dir) * dir); + } + + inline bool all_less(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + ret[1] = -m_s[1]; + if (N >= 3) + ret[2] = -m_s[2]; + + for (uint32_t i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + inline scalar_type perp_dot(const vec& b) const + { + // static_assert(N == 2); + return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; + } + + inline vec perp() const + { + // static_assert(N == 2); + return vec(-m_s[1], m_s[0]); + } + + inline vec get_floor() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = floor(m_s[i]); + return result; + } + + inline vec get_ceil() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = ceil(m_s[i]); + return result; + } + + inline T get_total() const + { + T res = m_s[0]; + for (uint32_t i = 1; i < N; i++) + res += m_s[i]; + return res; + } + + // static helper methods + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; + return result; + } + + static inline vec make_axis(uint32_t i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] == b[i]); + return ret; + } + + static inline vec not_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] != b[i]); + return ret; + } + + static inline vec less_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] < b[i]); + return ret; + } + + static inline vec less_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] <= b[i]); + return ret; + } + + static inline vec greater_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] >= b[i]); + return ret; + } + + static inline vec greater_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] > b[i]); + return ret; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = basisu::maximum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = basisu::minimum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline bool equal_tol(const vec& a, const vec& b, float t) + { + for (uint32_t i = 0; i < N; i++) + if (!basisu::equal_tol(a.m_s[i], b.m_s[i], t)) + return false; + return true; + } + + inline bool equal_tol(const vec& b, float t) const + { + return equal_tol(*this, b, t); + } + + static inline vec make_random(basisu::rand& r, float l, float h) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = r.frand(l, h); + return result; + } + + static inline vec make_random(basisu::rand& r, const vec& l, const vec& h) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = r.frand(l[i], h[i]); + return result; + } + + void print() const + { + for (uint32_t c = 0; c < N; c++) + printf("%3.3f ", (*this)[c]); + printf("\n"); + } + + protected: + T m_s[N]; + }; + + typedef vec<1, double> vec1D; + typedef vec<2, double> vec2D; + typedef vec<3, double> vec3D; + typedef vec<4, double> vec4D; + + typedef vec<1, float> vec1F; + + typedef vec<2, float> vec2F; + typedef basisu::vector vec2F_array; + + typedef vec<3, float> vec3F; + typedef basisu::vector vec3F_array; + + typedef vec<4, float> vec4F; + typedef basisu::vector vec4F_array; + + typedef vec<2, uint32_t> vec2U; + typedef vec<3, uint32_t> vec3U; + typedef vec<2, int> vec2I; + typedef vec<3, int> vec3I; + typedef vec<4, int> vec4I; + + typedef vec<2, int16_t> vec2I16; + typedef vec<3, int16_t> vec3I16; + + inline vec2F rotate_point_2D(const vec2F& p, float rad) + { + float c = cosf(rad); + float s = sinf(rad); + + float x = p[0]; + float y = p[1]; + + return vec2F(x * c - y * s, x * s + y * c); + } + + //-------------------------------------------------------------- + + // Matrix/vector cheat sheet, because confusingly, depending on how matrices are stored in memory people can use opposite definitions of "rows", "cols", etc. + // See http://www.mindcontrol.org/~hplus/graphics/matrix-layout.html + // + // So in this simple row-major general matrix class: + // matrix=[NumRows][NumCols] or [R][C], i.e. a 3x3 matrix stored in memory will appear as: R0C0, R0C1, R0C2, R1C0, R1C1, R1C2, etc. + // Matrix multiplication: [R0,C0]*[R1,C1]=[R0,C1], C0 must equal R1 + // + // In this class: + // A "row vector" type is a vector of size # of matrix cols, 1xC. It's the vector type that is used to store the matrix rows. + // A "col vector" type is a vector of size # of matrix rows, Rx1. It's a vector type large enough to hold each matrix column. + // + // Subrow/col vectors: last component is assumed to be either 0 (a "vector") or 1 (a "point") + // "subrow vector": vector/point of size # cols-1, 1x(C-1) + // "subcol vector": vector/point of size # rows-1, (R-1)x1 + // + // D3D style: + // vec*matrix, row vector on left (vec dotted against columns) + // [1,4]*[4,4]=[1,4] + // abcd * A B C D + // A B C D + // A B C D + // A B C D + // = e f g h + // + // Now confusingly, in the matrix transform method for vec*matrix below the vector's type is "col_vec", because col_vec will have the proper size for non-square matrices. But the vector on the left is written as row vector, argh. + // + // + // OGL style: + // matrix*vec, col vector on right (vec dotted against rows): + // [4,4]*[4,1]=[4,1] + // + // A B C D * e = e + // A B C D f f + // A B C D g g + // A B C D h h + + template + Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) + { + // static_assert(Z::num_rows == X::num_rows); + // static_assert(Z::num_cols == Y::num_cols); + // static_assert(X::num_cols == Y::num_rows); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); + for (uint32_t i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template + Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) + { + // static_assert(Z::num_rows == X::num_cols); + // static_assert(Z::num_cols == Y::num_cols); + // static_assert(X::num_rows == Y::num_rows); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (uint32_t r = 0; r < X::num_cols; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(0, r) * rhs(0, c); + for (uint32_t i = 1; i < X::num_rows; i++) + s += lhs(i, r) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template + Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) + { + // static_assert(Z::num_rows == X::num_rows); + // static_assert(Z::num_cols == Y::num_rows); + // static_assert(X::num_cols == Y::num_cols); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < Y::num_rows; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); + for (uint32_t i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(c, i); + result(r, c) = s; + } + return result; + } + + template + class matrix + { + public: + typedef T scalar_type; + static const uint32_t num_rows = R; + static const uint32_t num_cols = C; +#if 0 + enum + { + num_rows = R, + num_cols = C + }; +#endif + + typedef vec col_vec; + typedef vec < (R > 1) ? (R - 1) : 0, T > subcol_vec; + + typedef vec row_vec; + typedef vec < (C > 1) ? (C - 1) : 0, T > subrow_vec; + + inline matrix() + { + } + + inline matrix(basisu::eClear) + { + clear(); + } + + inline matrix(basisu::eIdentity) + { + set_identity_matrix(); + } + + inline matrix(const T* p) + { + set(p); + } + + inline matrix(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] = other.m_rows[i]; + } + + inline matrix& operator=(const matrix& rhs) + { + if (this != &rhs) + for (uint32_t i = 0; i < R; i++) + m_rows[i] = rhs.m_rows[i]; + return *this; + } + + inline matrix(T val00, T val01, + T val10, T val11) + { + set(val00, val01, val10, val11); + } + + inline matrix(T val00, T val01, + T val10, T val11, + T val20, T val21) + { + set(val00, val01, val10, val11, val20, val21); + } + + inline matrix(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + set(val00, val01, val02, val10, val11, val12, val20, val21, val22); + } + + inline matrix(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); + } + + inline matrix(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23); + } + + inline void set(const float* p) + { + for (uint32_t i = 0; i < R; i++) + { + m_rows[i].set(p); + p += C; + } + } + + inline void set(T val00, T val01, + T val10, T val11) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + for (uint32_t i = 2; i < R; i++) + m_rows[i].clear(); + } + } + + inline void set(T val00, T val01, + T val10, T val11, + T val20, T val21) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + if (R >= 3) + { + m_rows[2].set(val20, val21); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline void set(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + m_rows[0].set(val00, val01, val02); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline void set(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + if (R >= 4) + { + m_rows[3].set(val30, val31, val32, val33); + + for (uint32_t i = 4; i < R; i++) + m_rows[i].clear(); + } + } + } + } + + inline void set(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline uint32_t get_num_rows() const + { + return num_rows; + } + + inline uint32_t get_num_cols() const + { + return num_cols; + } + + inline uint32_t get_total_elements() const + { + return num_rows * num_cols; + } + + inline T operator()(uint32_t r, uint32_t c) const + { + assert((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline T& operator()(uint32_t r, uint32_t c) + { + assert((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline const row_vec& operator[](uint32_t r) const + { + assert(r < R); + return m_rows[r]; + } + + inline row_vec& operator[](uint32_t r) + { + assert(r < R); + return m_rows[r]; + } + + inline const row_vec& get_row(uint32_t r) const + { + return (*this)[r]; + } + + inline row_vec& get_row(uint32_t r) + { + return (*this)[r]; + } + + inline void set_row(uint32_t r, const row_vec& v) + { + (*this)[r] = v; + } + + inline col_vec get_col(uint32_t c) const + { + assert(c < C); + col_vec result; + for (uint32_t i = 0; i < R; i++) + result[i] = m_rows[i][c]; + return result; + } + + inline void set_col(uint32_t c, const col_vec& col) + { + assert(c < C); + for (uint32_t i = 0; i < R; i++) + m_rows[i][c] = col[i]; + } + + inline void set_col(uint32_t c, const subcol_vec& col) + { + assert(c < C); + for (uint32_t i = 0; i < (R - 1); i++) + m_rows[i][c] = col[i]; + + m_rows[R - 1][c] = 0.0f; + } + + inline const row_vec& get_translate() const + { + return m_rows[R - 1]; + } + + inline matrix& set_translate(const row_vec& r) + { + m_rows[R - 1] = r; + return *this; + } + + inline matrix& set_translate(const subrow_vec& r) + { + m_rows[R - 1] = row_vec(r).as_point(); + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_rows[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_rows[0]); + } + + inline matrix& operator+=(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] += other.m_rows[i]; + return *this; + } + + inline matrix& operator-=(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] -= other.m_rows[i]; + return *this; + } + + inline matrix& operator*=(T val) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] *= val; + return *this; + } + + inline matrix& operator/=(T val) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] /= val; + return *this; + } + + inline matrix& operator*=(const matrix& other) + { + matrix result; + matrix_mul_helper(result, *this, other); + *this = result; + return *this; + } + + friend inline matrix operator+(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] + rhs.m_rows[i]; + return result; + } + + friend inline matrix operator-(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] - rhs.m_rows[i]; + return result; + } + + friend inline matrix operator*(const matrix& lhs, T val) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] * val; + return result; + } + + friend inline matrix operator/(const matrix& lhs, T val) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] / val; + return result; + } + + friend inline matrix operator*(T val, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = val * rhs.m_rows[i]; + return result; + } + +#if 0 + template + friend inline matrix operator*(const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } +#endif + friend inline matrix operator*(const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } + + friend inline row_vec operator*(const col_vec& a, const matrix& b) + { + return transform(a, b); + } + + inline matrix operator+() const + { + return *this; + } + + inline matrix operator-() const + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = -m_rows[i]; + return result; + } + + inline matrix& clear() + { + for (uint32_t i = 0; i < R; i++) + m_rows[i].clear(); + return *this; + } + + inline matrix& set_zero_matrix() + { + clear(); + return *this; + } + + inline matrix& set_identity_matrix() + { + for (uint32_t i = 0; i < R; i++) + { + m_rows[i].clear(); + m_rows[i][i] = 1.0f; + } + return *this; + } + + inline matrix& set_scale_matrix(float s) + { + clear(); + for (int i = 0; i < (R - 1); i++) + m_rows[i][i] = s; + m_rows[R - 1][C - 1] = 1.0f; + return *this; + } + + inline matrix& set_scale_matrix(const row_vec& s) + { + clear(); + for (uint32_t i = 0; i < R; i++) + m_rows[i][i] = s[i]; + return *this; + } + + inline matrix& set_scale_matrix(float x, float y) + { + set_identity_matrix(); + m_rows[0].set_x(x); + m_rows[1].set_y(y); + return *this; + } + + inline matrix& set_scale_matrix(float x, float y, float z) + { + set_identity_matrix(); + m_rows[0].set_x(x); + m_rows[1].set_y(y); + m_rows[2].set_z(z); + return *this; + } + + inline matrix& set_translate_matrix(const row_vec& s) + { + set_identity_matrix(); + set_translate(s); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y) + { + set_identity_matrix(); + set_translate(row_vec(x, y).as_point()); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y, float z) + { + set_identity_matrix(); + set_translate(row_vec(x, y, z).as_point()); + return *this; + } + + inline matrix get_transposed() const + { + // static_assert(R == C); + + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + return result; + } + + inline matrix get_transposed_nonsquare() const + { + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result[j][i] = m_rows[i][j]; + return result; + } + + inline matrix& transpose_in_place() + { + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + *this = result; + return *this; + } + + // Frobenius Norm + T get_norm() const + { + T result = 0; + + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result += m_rows[i][j] * m_rows[i][j]; + + return static_cast(sqrt(result)); + } + + inline matrix get_power(T p) const + { + matrix result; + + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result[i][j] = static_cast(pow(m_rows[i][j], p)); + + return result; + } + + inline matrix<1, R, T> numpy_dot(const matrix<1, C, T>& b) const + { + matrix<1, R, T> result; + + for (uint32_t r = 0; r < R; r++) + { + T sum = 0; + for (uint32_t c = 0; c < C; c++) + sum += m_rows[r][c] * b[0][c]; + + result[0][r] = static_cast(sum); + } + + return result; + } + + bool invert(matrix& result) const + { + // static_assert(R == C); + + result.set_identity_matrix(); + + matrix mat(*this); + + for (uint32_t c = 0; c < C; c++) + { + uint32_t max_r = c; + for (uint32_t r = c + 1; r < R; r++) + if (fabs(mat[r][c]) > fabs(mat[max_r][c])) + max_r = r; + + if (mat[max_r][c] == 0.0f) + { + result.set_identity_matrix(); + return false; + } + + std::swap(mat[c], mat[max_r]); + std::swap(result[c], result[max_r]); + + result[c] /= mat[c][c]; + mat[c] /= mat[c][c]; + + for (uint32_t row = 0; row < R; row++) + { + if (row != c) + { + const row_vec temp(mat[row][c]); + mat[row] -= row_vec::mul_components(mat[c], temp); + result[row] -= row_vec::mul_components(result[c], temp); + } + } + } + + return true; + } + + matrix& invert_in_place() + { + matrix result; + invert(result); + *this = result; + return *this; + } + + matrix get_inverse() const + { + matrix result; + invert(result); + return result; + } + + T get_det() const + { + // static_assert(R == C); + return det_helper(*this, R); + } + + bool equal_tol(const matrix& b, float tol) const + { + for (uint32_t r = 0; r < R; r++) + if (!row_vec::equal_tol(m_rows[r], b.m_rows[r], tol)) + return false; + return true; + } + + bool is_square() const + { + return R == C; + } + + double get_trace() const + { + // static_assert(is_square()); + + T total = 0; + for (uint32_t i = 0; i < R; i++) + total += (*this)(i, i); + + return total; + } + + void print() const + { + for (uint32_t r = 0; r < R; r++) + { + for (uint32_t c = 0; c < C; c++) + printf("%3.7f ", (*this)(r, c)); + printf("\n"); + } + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Confusingly, note that the data type is named "col_vec", but mathematically it's actually written as a row vector (of size equal to the # matrix rows, which is why it's called a "col_vec" in this class). + // 1xR * RxC = 1xC + // This dots against the matrix columns. + static inline row_vec transform(const col_vec& a, const matrix& b) + { + row_vec result(b[0] * a[0]); + for (uint32_t r = 1; r < R; r++) + result += b[r] * a[r]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 1. + static inline row_vec transform_point(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + result += b[R - 1]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 0. + static inline row_vec transform_vector(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 1. + static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < static_cast(R); r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 0. + static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < static_cast(R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + // Like transform() above, but the matrix is effectively transposed before the multiply. + static inline col_vec transform_transposed(const col_vec& a, const matrix& b) + { + // static_assert(R == C); + col_vec result; + for (uint32_t r = 0; r < R; r++) + result[r] = b[r].dot(a); + return result; + } + + // Like transform() above, but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 0. + static inline col_vec transform_vector_transposed(const col_vec& a, const matrix& b) + { + // static_assert(R == C); + col_vec result; + for (uint32_t r = 0; r < R; r++) + { + T s = 0; + for (uint32_t c = 0; c < (C - 1); c++) + s += b[r][c] * a[c]; + + result[r] = s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 1. + static inline subcol_vec transform_point_transposed(const subcol_vec& a, const matrix& b) + { + // static_assert(R == C); + subcol_vec result(0); + for (int r = 0; r < R; r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < (C - 1); c++) + result[c] += b[c][r] * s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 0. + static inline subcol_vec transform_vector_transposed(const subcol_vec& a, const matrix& b) + { + // static_assert(R == C); + subcol_vec result(0); + for (int r = 0; r < static_cast(R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[c][r] * s; + } + return result; + } + + // This method transforms a matrix by a vector (OGL style, col vector on the right). + // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). + // RxC * Cx1 = Rx1 + // This dots against the matrix rows. + static inline col_vec transform(const matrix& b, const row_vec& a) + { + col_vec result; + for (int r = 0; r < static_cast(R); r++) + result[r] = b[r].dot(a); + return result; + } + + // This method transforms a matrix by a vector (OGL style, col vector on the right), except the matrix is effectively transposed before the multiply. + // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). + // RxC * Cx1 = Rx1 + // This dots against the matrix cols. + static inline col_vec transform_transposed(const matrix& b, const row_vec& a) + { + // static_assert(R == C); + row_vec result(b[0] * a[0]); + for (int r = 1; r < static_cast(R); r++) + result += b[r] * a[r]; + return col_vec(result); + } + + static inline matrix& mul_components(matrix& result, const matrix& lhs, const matrix& rhs) + { + for (uint32_t r = 0; r < R; r++) + result[r] = row_vec::mul_components(lhs[r], rhs[r]); + return result; + } + + static inline matrix& concat(matrix& lhs, const matrix& rhs) + { + return matrix_mul_helper(lhs, matrix(lhs), rhs); + } + + inline matrix& concat_in_place(const matrix& rhs) + { + return concat(*this, rhs); + } + + static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) + { + matrix temp; + matrix* pResult = ((&result == &lhs) || (&result == &rhs)) ? &temp : &result; + + matrix_mul_helper(*pResult, lhs, rhs); + if (pResult != &result) + result = *pResult; + + return result; + } + + static matrix make_zero_matrix() + { + matrix result; + result.clear(); + return result; + } + + static matrix make_identity_matrix() + { + matrix result; + result.set_identity_matrix(); + return result; + } + + static matrix make_translate_matrix(const row_vec& t) + { + return matrix(basisu::cIdentity).set_translate(t); + } + + static matrix make_translate_matrix(float x, float y) + { + return matrix(basisu::cIdentity).set_translate_matrix(x, y); + } + + static matrix make_translate_matrix(float x, float y, float z) + { + return matrix(basisu::cIdentity).set_translate_matrix(x, y, z); + } + + static inline matrix make_scale_matrix(float s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(const row_vec& s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(float x, float y) + { + // static_assert(R >= 3 && C >= 3); + matrix result; + result.set_identity_matrix(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + return result; + } + + static inline matrix make_scale_matrix(float x, float y, float z) + { + // static_assert(R >= 4 && C >= 4); + matrix result; + result.set_identity_matrix(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = z; + return result; + } + + // Helpers derived from Graphics Gems 1 and 2 (Matrices and Transformations, Ronald N. Goldman) + static matrix make_rotate_matrix(const vec<3, T>& axis, T ang) + { + // static_assert(R >= 3 && C >= 3); + + vec<3, T> norm_axis(axis.get_normalized()); + + double cos_a = cos(ang); + double inv_cos_a = 1.0f - cos_a; + + double sin_a = sin(ang); + + const T x = norm_axis[0]; + const T y = norm_axis[1]; + const T z = norm_axis[2]; + + const double x2 = norm_axis[0] * norm_axis[0]; + const double y2 = norm_axis[1] * norm_axis[1]; + const double z2 = norm_axis[2] * norm_axis[2]; + + matrix result; + result.set_identity_matrix(); + + result[0][0] = (T)((inv_cos_a * x2) + cos_a); + result[1][0] = (T)((inv_cos_a * x * y) + (sin_a * z)); + result[2][0] = (T)((inv_cos_a * x * z) - (sin_a * y)); + + result[0][1] = (T)((inv_cos_a * x * y) - (sin_a * z)); + result[1][1] = (T)((inv_cos_a * y2) + cos_a); + result[2][1] = (T)((inv_cos_a * y * z) + (sin_a * x)); + + result[0][2] = (T)((inv_cos_a * x * z) + (sin_a * y)); + result[1][2] = (T)((inv_cos_a * y * z) - (sin_a * x)); + result[2][2] = (T)((inv_cos_a * z2) + cos_a); + + return result; + } + + static inline matrix make_rotate_matrix(T ang) + { + // static_assert(R >= 2 && C >= 2); + + matrix ret(basisu::cIdentity); + + const T sin_a = static_cast(sin(ang)); + const T cos_a = static_cast(cos(ang)); + + ret[0][0] = +cos_a; + ret[0][1] = -sin_a; + ret[1][0] = +sin_a; + ret[1][1] = +cos_a; + + return ret; + } + + static inline matrix make_rotate_matrix(uint32_t axis, T ang) + { + vec<3, T> axis_vec; + axis_vec.clear(); + axis_vec[axis] = 1.0f; + return make_rotate_matrix(axis_vec, ang); + } + + static inline matrix make_cross_product_matrix(const vec<3, scalar_type>& c) + { + // static_assert((num_rows >= 3) && (num_cols >= 3)); + matrix ret(basisu::cClear); + ret[0][1] = c[2]; + ret[0][2] = -c[1]; + ret[1][0] = -c[2]; + ret[1][2] = c[0]; + ret[2][0] = c[1]; + ret[2][1] = -c[0]; + return ret; + } + + static inline matrix make_reflection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) + { + // static_assert((num_rows == 4) && (num_cols == 4)); + matrix ret; + assert(n.is_vector() && q.is_vector()); + ret = make_identity_matrix() - 2.0f * make_tensor_product_matrix(n, n); + ret.set_translate((2.0f * q.dot(n) * n).as_point()); + return ret; + } + + static inline matrix make_tensor_product_matrix(const row_vec& v, const row_vec& w) + { + matrix ret; + for (uint32_t r = 0; r < num_rows; r++) + ret[r] = row_vec::mul_components(v.broadcast(r), w); + return ret; + } + + static inline matrix make_uniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c) + { + // static_assert((num_rows == 4) && (num_cols == 4)); + assert(q.is_vector()); + matrix ret; + ret = c * make_identity_matrix(); + ret.set_translate(((1.0f - c) * q).as_point()); + return ret; + } + + static inline matrix make_nonuniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c, const vec<4, scalar_type>& w) + { + // static_assert((num_rows == 4) && (num_cols == 4)); + assert(q.is_vector() && w.is_vector()); + matrix ret; + ret = make_identity_matrix() - (1.0f - c) * make_tensor_product_matrix(w, w); + ret.set_translate(((1.0f - c) * q.dot(w) * w).as_point()); + return ret; + } + + // n = normal of plane, q = point on plane + static inline matrix make_ortho_projection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) + { + assert(n.is_vector() && q.is_vector()); + matrix ret; + ret = make_identity_matrix() - make_tensor_product_matrix(n, n); + ret.set_translate((q.dot(n) * n).as_point()); + return ret; + } + + static inline matrix make_parallel_projection(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q, const vec<4, scalar_type>& w) + { + assert(n.is_vector() && q.is_vector() && w.is_vector()); + matrix ret; + ret = make_identity_matrix() - (make_tensor_product_matrix(n, w) / (w.dot(n))); + ret.set_translate(((q.dot(n) / w.dot(n)) * w).as_point()); + return ret; + } + + protected: + row_vec m_rows[R]; + + static T det_helper(const matrix& a, uint32_t n) + { + // Algorithm ported from Numerical Recipes in C. + T d; + matrix m; + if (n == 2) + d = a(0, 0) * a(1, 1) - a(1, 0) * a(0, 1); + else + { + d = 0; + for (uint32_t j1 = 1; j1 <= n; j1++) + { + for (uint32_t i = 2; i <= n; i++) + { + int j2 = 1; + for (uint32_t j = 1; j <= n; j++) + { + if (j != j1) + { + m(i - 2, j2 - 1) = a(i - 1, j - 1); + j2++; + } + } + } + d += (((1 + j1) & 1) ? -1.0f : 1.0f) * a(1 - 1, j1 - 1) * det_helper(m, n - 1); + } + } + return d; + } + }; + + typedef matrix<2, 2, float> matrix22F; + typedef matrix<2, 2, double> matrix22D; + + typedef matrix<3, 3, float> matrix33F; + typedef matrix<3, 3, double> matrix33D; + + typedef matrix<4, 4, float> matrix44F; + typedef matrix<4, 4, double> matrix44D; + + typedef matrix<8, 8, float> matrix88F; + + // These helpers create good old D3D-style matrices. + inline matrix44F matrix44F_make_perspective_offcenter_lh(float l, float r, float b, float t, float nz, float fz) + { + float two_nz = 2.0f * nz; + float one_over_width = 1.0f / (r - l); + float one_over_height = 1.0f / (t - b); + + matrix44F view_to_proj; + view_to_proj[0].set(two_nz * one_over_width, 0.0f, 0.0f, 0.0f); + view_to_proj[1].set(0.0f, two_nz * one_over_height, 0.0f, 0.0f); + view_to_proj[2].set(-(l + r) * one_over_width, -(t + b) * one_over_height, fz / (fz - nz), 1.0f); + view_to_proj[3].set(0.0f, 0.0f, -view_to_proj[2][2] * nz, 0.0f); + return view_to_proj; + } + + // fov_y: full Y field of view (radians) + // aspect: viewspace width/height + inline matrix44F matrix44F_make_perspective_fov_lh(float fov_y, float aspect, float nz, float fz) + { + double sin_fov = sin(0.5f * fov_y); + double cos_fov = cos(0.5f * fov_y); + + float y_scale = static_cast(cos_fov / sin_fov); + float x_scale = static_cast(y_scale / aspect); + + matrix44F view_to_proj; + view_to_proj[0].set(x_scale, 0, 0, 0); + view_to_proj[1].set(0, y_scale, 0, 0); + view_to_proj[2].set(0, 0, fz / (fz - nz), 1); + view_to_proj[3].set(0, 0, -nz * fz / (fz - nz), 0); + return view_to_proj; + } + + inline matrix44F matrix44F_make_ortho_offcenter_lh(float l, float r, float b, float t, float nz, float fz) + { + matrix44F view_to_proj; + view_to_proj[0].set(2.0f / (r - l), 0.0f, 0.0f, 0.0f); + view_to_proj[1].set(0.0f, 2.0f / (t - b), 0.0f, 0.0f); + view_to_proj[2].set(0.0f, 0.0f, 1.0f / (fz - nz), 0.0f); + view_to_proj[3].set((l + r) / (l - r), (t + b) / (b - t), nz / (nz - fz), 1.0f); + return view_to_proj; + } + + inline matrix44F matrix44F_make_ortho_lh(float w, float h, float nz, float fz) + { + return matrix44F_make_ortho_offcenter_lh(-w * .5f, w * .5f, -h * .5f, h * .5f, nz, fz); + } + + inline matrix44F matrix44F_make_projection_to_screen_d3d(int x, int y, int w, int h, float min_z, float max_z) + { + matrix44F proj_to_screen; + proj_to_screen[0].set(w * .5f, 0.0f, 0.0f, 0.0f); + proj_to_screen[1].set(0, h * -.5f, 0.0f, 0.0f); + proj_to_screen[2].set(0, 0.0f, max_z - min_z, 0.0f); + proj_to_screen[3].set(x + w * .5f, y + h * .5f, min_z, 1.0f); + return proj_to_screen; + } + + inline matrix44F matrix44F_make_lookat_lh(const vec3F& camera_pos, const vec3F& look_at, const vec3F& camera_up, float camera_roll_ang_in_radians) + { + vec4F col2(look_at - camera_pos); + assert(col2.is_vector()); + if (col2.normalize() == 0.0f) + col2.set(0, 0, 1, 0); + + vec4F col1(camera_up); + assert(col1.is_vector()); + if (!col2[0] && !col2[2]) + col1.set(-1.0f, 0.0f, 0.0f, 0.0f); + + if ((col1.dot(col2)) > .9999f) + col1.set(0.0f, 1.0f, 0.0f, 0.0f); + + vec4F col0(vec4F::cross3(col1, col2).normalize_in_place()); + col1 = vec4F::cross3(col2, col0).normalize_in_place(); + + matrix44F rotm(matrix44F::make_identity_matrix()); + rotm.set_col(0, col0); + rotm.set_col(1, col1); + rotm.set_col(2, col2); + return matrix44F::make_translate_matrix(-camera_pos[0], -camera_pos[1], -camera_pos[2]) * rotm * matrix44F::make_rotate_matrix(2, camera_roll_ang_in_radians); + } + + template R matrix_NxN_create_DCT() + { + assert(R::num_rows == R::num_cols); + + const uint32_t N = R::num_cols; + + R result; + for (uint32_t k = 0; k < N; k++) + { + for (uint32_t n = 0; n < N; n++) + { + double f; + + if (!k) + f = 1.0f / sqrt(float(N)); + else + f = sqrt(2.0f / float(N)) * cos((basisu::cPiD * (2.0f * float(n) + 1.0f) * float(k)) / (2.0f * float(N))); + + result(k, n) = static_cast(f); + } + } + + return result; + } + + template R matrix_NxN_DCT(const R& a, const R& dct) + { + R temp; + matrix_mul_helper(temp, dct, a); + R result; + matrix_mul_helper_transpose_rhs(result, temp, dct); + return result; + } + + template R matrix_NxN_IDCT(const R& b, const R& dct) + { + R temp; + matrix_mul_helper_transpose_lhs(temp, dct, b); + R result; + matrix_mul_helper(result, temp, dct); + return result; + } + + template matrix matrix_kronecker_product(const X& a, const Y& b) + { + matrix result; + + for (uint32_t r = 0; r < X::num_rows; r++) + { + for (uint32_t c = 0; c < X::num_cols; c++) + { + for (uint32_t i = 0; i < Y::num_rows; i++) + for (uint32_t j = 0; j < Y::num_cols; j++) + result(r * Y::num_rows + i, c * Y::num_cols + j) = a(r, c) * b(i, j); + } + } + + return result; + } + + template matrix matrix_combine_vertically(const X& a, const Y& b) + { + matrix result; + + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < X::num_cols; c++) + result(r, c) = a(r, c); + + for (uint32_t r = 0; r < Y::num_rows; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) + result(r + X::num_rows, c) = b(r, c); + + return result; + } + + inline matrix88F get_haar8() + { + matrix22F haar2( + 1, 1, + 1, -1); + matrix22F i2( + 1, 0, + 0, 1); + matrix44F i4( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1); + + matrix<1, 2, float> b0; b0(0, 0) = 1; b0(0, 1) = 1; + matrix<1, 2, float> b1; b1(0, 0) = 1.0f; b1(0, 1) = -1.0f; + + matrix<2, 4, float> haar4_0 = matrix_kronecker_product(haar2, b0); + matrix<2, 4, float> haar4_1 = matrix_kronecker_product(i2, b1); + + matrix<4, 4, float> haar4 = matrix_combine_vertically(haar4_0, haar4_1); + + matrix<4, 8, float> haar8_0 = matrix_kronecker_product(haar4, b0); + matrix<4, 8, float> haar8_1 = matrix_kronecker_product(i4, b1); + + haar8_0[2] *= sqrtf(2); + haar8_0[3] *= sqrtf(2); + haar8_1 *= 2.0f; + + matrix<8, 8, float> haar8 = matrix_combine_vertically(haar8_0, haar8_1); + + return haar8; + } + + inline matrix44F get_haar4() + { + const float sqrt2 = 1.4142135623730951f; + + return matrix44F( + .5f * 1, .5f * 1, .5f * 1, .5f * 1, + .5f * 1, .5f * 1, .5f * -1, .5f * -1, + .5f * sqrt2, .5f * -sqrt2, 0, 0, + 0, 0, .5f * sqrt2, .5f * -sqrt2); + } + + template + inline matrix<2, 2, T> get_inverse_2x2(const matrix<2, 2, T>& m) + { + double a = m[0][0]; + double b = m[0][1]; + double c = m[1][0]; + double d = m[1][1]; + + double det = a * d - b * c; + if (det != 0.0f) + det = 1.0f / det; + + matrix<2, 2, T> result; + result[0][0] = static_cast(d * det); + result[0][1] = static_cast(-b * det); + result[1][0] = static_cast(-c * det); + result[1][1] = static_cast(a * det); + return result; + } + +} // namespace bu_math + +namespace basisu +{ + class tracked_stat + { + public: + tracked_stat() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(int32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + inline uint32_t get_number_of_values() { return m_num; } + inline uint64_t get_total() const { return m_total; } + inline uint64_t get_total2() const { return m_total2; } + + inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + int64_t m_total; + int64_t m_total2; + }; + + class tracked_stat_float + { + public: + tracked_stat_float() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(float val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat_float& operator += (float val) { update(val); return *this; } + + inline uint32_t get_number_of_values() { return m_num; } + inline float get_total() const { return m_total; } + inline float get_total2() const { return m_total2; } + + inline float get_average() const { return m_num ? m_total / (float)m_num : 0.0f; }; + inline float get_std_dev() const { return m_num ? sqrt((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + float m_total; + float m_total2; + }; + + class tracked_stat_dbl + { + public: + tracked_stat_dbl() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(double val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat_dbl& operator += (double val) { update(val); return *this; } + + inline uint64_t get_number_of_values() { return m_num; } + inline double get_total() const { return m_total; } + inline double get_total2() const { return m_total2; } + + inline double get_average() const { return m_num ? m_total / (double)m_num : 0.0f; }; + inline double get_std_dev() const { return m_num ? sqrt((double)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline double get_variance() const { double s = get_std_dev(); return s * s; } + + private: + uint64_t m_num; + double m_total; + double m_total2; + }; + + template + struct stats + { + uint32_t m_n; + FloatType m_total, m_total_sq; // total, total of squares values + FloatType m_avg, m_avg_sq; // mean, mean of the squared values + FloatType m_rms; // sqrt(m_avg_sq) + FloatType m_std_dev, m_var; // population standard deviation and variance + FloatType m_mad; // mean absolute deviation + FloatType m_min, m_max, m_range; // min and max values, and max-min + FloatType m_len; // length of values as a vector (Euclidean norm or L2 norm) + FloatType m_coeff_of_var; // coefficient of variation (std_dev/mean), High CV: Indicates greater variability relative to the mean, meaning the data values are more spread out, + // Low CV : Indicates less variability relative to the mean, meaning the data values are more consistent. + + FloatType m_skewness; // Skewness = 0: The data is perfectly symmetric around the mean, + // Skewness > 0: The data is positively skewed (right-skewed), + // Skewness < 0: The data is negatively skewed (left-skewed) + // 0-.5 approx. symmetry, .5-1 moderate skew, >= 1 highly skewed + + FloatType m_kurtosis; // Excess Kurtosis: Kurtosis = 0: The distribution has normal kurtosis (mesokurtic) + // Kurtosis > 0: The distribution is leptokurtic, with heavy tails and a sharp peak + // Kurtosis < 0: The distribution is platykurtic, with light tails and a flatter peak + + bool m_any_zero; + + FloatType m_median; + uint32_t m_median_index; + + FloatType m_five_percent_lo; // avg of the lowest 5%, must calc median to be valid + FloatType m_five_percent_hi; // avg of the lowest 5%, must calc median to be valid + + stats() + { + clear(); + } + + void clear() + { + m_n = 0; + m_total = 0, m_total_sq = 0; + m_avg = 0, m_avg_sq = 0; + m_rms = 0; + m_std_dev = 0, m_var = 0; + m_mad = 0; + m_min = BIG_FLOAT_VAL, m_max = -BIG_FLOAT_VAL; m_range = 0.0f; + m_len = 0; + m_coeff_of_var = 0; + m_skewness = 0; + m_kurtosis = 0; + m_any_zero = false; + + m_median = 0; + m_median_index = 0; + + m_five_percent_lo = 0; + m_five_percent_hi = 0; + } + + template + void calc_median(uint32_t n, const T* pVals, uint32_t stride = 1) + { + m_median = 0; + m_median_index = 0; + + if (!n) + return; + + basisu::vector< std::pair > vals(n); + + for (uint32_t i = 0; i < n; i++) + { + vals[i].first = pVals[i * stride]; + vals[i].second = i; + } + + std::sort(vals.begin(), vals.end(), [](const std::pair& a, const std::pair& b) { + return a.first < b.first; + }); + + m_median = vals[n / 2].first; + if ((n & 1) == 0) + m_median = (m_median + vals[(n / 2) - 1].first) * .5f; + + m_median_index = vals[n / 2].second; + + // sum and avg low 5% and high 5% + const uint32_t p5_n = clamp((n + 10) / 20, 1u, n); + FloatType lo5_sum = 0, hi5_sum = 0; + + for (uint32_t i = 0; i < p5_n; i++) + { + lo5_sum += vals[i].first; + hi5_sum += vals[n - 1 - i].first; + } + + m_five_percent_lo = lo5_sum / FloatType(p5_n); + m_five_percent_hi = hi5_sum / FloatType(p5_n); + } + + template + void calc(uint32_t n, const T* pVals, uint32_t stride = 1, bool calc_median_flag = false) + { + clear(); + + if (!n) + return; + + if (calc_median_flag) + calc_median(n, pVals, stride); + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + if (v == 0.0f) + m_any_zero = true; + + m_total += v; + m_total_sq += v * v; + + if (!i) + { + m_min = v; + m_max = v; + } + else + { + m_min = minimum(m_min, v); + m_max = maximum(m_max, v); + } + } + + m_range = m_max - m_min; + + m_len = sqrt(m_total_sq); + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + m_avg_sq = m_total_sq / nd; + m_rms = sqrt(m_avg_sq); + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + const FloatType d3 = d2 * d; + const FloatType d4 = d3 * d; + + m_var += d2; + m_mad += fabs(d); + m_skewness += d3; + m_kurtosis += d4; + } + + m_var /= nd; + m_mad /= nd; + + m_std_dev = sqrt(m_var); + + m_coeff_of_var = (m_avg != 0.0f) ? (m_std_dev / fabs(m_avg)) : 0.0f; + + FloatType k3 = m_std_dev * m_std_dev * m_std_dev; + FloatType k4 = k3 * m_std_dev; + m_skewness = (k3 != 0.0f) ? ((m_skewness / nd) / k3) : 0.0f; + m_kurtosis = (k4 != 0.0f) ? (((m_kurtosis / nd) / k4) - 3.0f) : 0.0f; + } + + // Only compute average, variance and standard deviation. + template + void calc_simplified(uint32_t n, const T* pVals, uint32_t stride = 1) + { + clear(); + + if (!n) + return; + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + m_total += v; + } + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + + m_var += d2; + } + + m_var /= nd; + m_std_dev = sqrt(m_var); + } + + // Only compute average, variance and standard deviation. + template + void calc_simplified_with_range(uint32_t n, const T* pVals, uint32_t stride = 1) + { + clear(); + + if (!n) + return; + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + m_total += v; + + if (!i) + { + m_min = v; + m_max = v; + } + else + { + m_min = minimum(m_min, v); + m_max = maximum(m_max, v); + } + } + + m_range = m_max - m_min; + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + + m_var += d2; + } + + m_var /= nd; + m_std_dev = sqrt(m_var); + } + }; + + template + struct comparative_stats + { + FloatType m_cov; // covariance + FloatType m_pearson; // Pearson Correlation Coefficient (r) [-1,1] + FloatType m_mse; // mean squared error + FloatType m_rmse; // root mean squared error + FloatType m_mae; // mean abs error + FloatType m_rmsle; // root mean squared log error + FloatType m_euclidean_dist; // euclidean distance between values as vectors + FloatType m_cosine_sim; // normalized dot products of values as vectors + FloatType m_min_diff, m_max_diff; // minimum/maximum abs difference between values + + comparative_stats() + { + clear(); + } + + void clear() + { + m_cov = 0; + m_pearson = 0; + m_mse = 0; + m_rmse = 0; + m_mae = 0; + m_rmsle = 0; + m_euclidean_dist = 0; + m_cosine_sim = 0; + m_min_diff = 0; + m_max_diff = 0; + } + + template + void calc(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats *pA_stats = nullptr, const stats *pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + if ((pA_stats->m_min >= 0.0f) && (pB_stats->m_min >= 0.0f)) + { + const FloatType ld = log(fa + 1.0f) - log(fb + 1.0f); + m_rmsle += ld * ld; + } + + const FloatType diff = fa - fb; + const FloatType abs_diff = fabs(diff); + + m_mse += diff * diff; + m_mae += abs_diff; + + m_min_diff = i ? minimum(m_min_diff, abs_diff) : abs_diff; + m_max_diff = maximum(m_max_diff, abs_diff); + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + + m_cosine_sim += fa * fb; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_mae /= nd; + + m_cov /= nd; + + FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); + if (dv != 0.0f) + m_pearson = m_cov / dv; + + if ((pA_stats->m_min >= 0.0) && (pB_stats->m_min >= 0.0f)) + m_rmsle = sqrt(m_rmsle / nd); + + FloatType c = pA_stats->m_len * pB_stats->m_len; + if (c != 0.0f) + m_cosine_sim /= c; + else + m_cosine_sim = 0.0f; + } + + // Only computes Pearson, cov, mse, rmse, Euclidean distance + template + void calc_pearson(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType diff = fa - fb; + + m_mse += diff * diff; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_cov /= nd; + + FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); + if (dv != 0.0f) + m_pearson = m_cov / dv; + } + + // Only computes MSE, RMSE, eclidiean distance, and covariance. + template + void calc_simplified(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType diff = fa - fb; + + m_mse += diff * diff; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_cov /= nd; + } + + // Only computes covariance. + template + void calc_cov(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_cov /= nd; + } + }; + + class stat_history + { + public: + stat_history(uint32_t size) + { + init(size); + } + + void init(uint32_t size) + { + clear(); + + m_samples.reserve(size); + m_samples.resize(0); + m_max_samples = size; + } + + inline void clear() + { + m_samples.resize(0); + m_max_samples = 0; + } + + inline void update(double val) + { + m_samples.push_back(val); + + if (m_samples.size() > m_max_samples) + m_samples.erase_index(0); + } + + inline size_t size() + { + return m_samples.size(); + } + + struct stats + { + double m_avg = 0; + double m_std_dev = 0; + double m_var = 0; + double m_mad = 0; + double m_min_val = 0; + double m_max_val = 0; + + void clear() + { + basisu::clear_obj(*this); + } + }; + + inline void get_stats(stats& s) + { + s.clear(); + + if (m_samples.empty()) + return; + + double total = 0, total2 = 0; + + for (size_t i = 0; i < m_samples.size(); i++) + { + const double v = m_samples[i]; + + total += v; + total2 += v * v; + + if (!i) + { + s.m_min_val = v; + s.m_max_val = v; + } + else + { + s.m_min_val = basisu::minimum(s.m_min_val, v); + s.m_max_val = basisu::maximum(s.m_max_val, v); + } + } + + const double n = (double)m_samples.size(); + + s.m_avg = total / n; + s.m_std_dev = sqrt((n * total2 - total * total)) / n; + s.m_var = (n * total2 - total * total) / (n * n); + + double sc = 0; + for (size_t i = 0; i < m_samples.size(); i++) + { + const double v = m_samples[i]; + s.m_mad += fabs(v - s.m_avg); + + sc += basisu::square(v - s.m_avg); + } + sc = sqrt(sc / n); + + s.m_mad /= n; + } + + private: + uint32_t m_max_samples; + basisu::vector m_samples; + }; + + // bfloat16 helpers, see: + // https://en.wikipedia.org/wiki/Bfloat16_floating-point_format + + typedef union + { + uint32_t u; + float f; + } float32_union; + + typedef uint16_t bfloat16; + + inline float bfloat16_to_float(bfloat16 bfloat16) + { + float32_union float_union; + float_union.u = ((uint32_t)bfloat16) << 16; + return float_union.f; + } + + inline bfloat16 float_to_bfloat16(float input, bool round_flag = true) + { + float32_union float_union; + float_union.f = input; + + uint32_t exponent = (float_union.u >> 23) & 0xFF; + + // Check if the number is denormalized in float32 (exponent == 0) + if (exponent == 0) + { + // Handle denormalized float32 as zero in bfloat16 + return 0x0000; + } + + // Extract the top 16 bits (sign, exponent, and 7 most significant bits of the mantissa) + uint32_t upperBits = float_union.u >> 16; + + if (round_flag) + { + // Check the most significant bit of the lower 16 bits for rounding + uint32_t lowerBits = float_union.u & 0xFFFF; + + // Round to nearest or even + if ((lowerBits & 0x8000) && + ((lowerBits > 0x8000) || ((lowerBits == 0x8000) && (upperBits & 1))) + ) + { + // Round up + upperBits += 1; + + // Check for overflow in the exponent after rounding up + if (((upperBits & 0x7F80) == 0x7F80) && ((upperBits & 0x007F) == 0)) + { + // Exponent overflow (the upper bits became all 1s) + // Set the result to infinity + upperBits = (upperBits & 0x8000) | 0x7F80; // Preserve the sign bit, set exponent to 0xFF, and mantissa to 0 + } + } + } + + return (bfloat16)upperBits; + } + + inline int bfloat16_get_exp(bfloat16 v) + { + return (int)((v >> 7) & 0xFF) - 127; + } + + inline int bfloat16_get_mantissa(bfloat16 v) + { + return (v & 0x7F); + } + + inline int bfloat16_get_sign(bfloat16 v) + { + return (v & 0x8000) ? -1 : 1; + } + + inline bool bfloat16_is_nan_or_inf(bfloat16 v) + { + return ((v >> 7) & 0xFF) == 0xFF; + } + + inline bool bfloat16_is_zero(bfloat16 v) + { + return (v & 0x7FFF) == 0; + } + + inline bfloat16 bfloat16_init(int sign, int exp, int mant) + { + uint16_t res = (sign < 0) ? 0x8000 : 0; + + assert((exp >= -126) && (res <= 127)); + res |= ((exp + 127) << 7); + + assert((mant >= 0) && (mant < 128)); + res |= mant; + + return res; + } + + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_miniz.h b/Source/ThirdParty/basis_universal/encoder/basisu_miniz.h new file mode 100644 index 000000000..43d757fe7 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_miniz.h @@ -0,0 +1,2530 @@ +/* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + + Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) + // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux + #define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) + #include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +// Using unaligned loads and stores causes errors when using UBSan. Jam it off. +#if defined(__has_feature) +#if __has_feature(undefined_behavior_sanitizer) +#undef MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 +#endif +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +namespace buminiz { + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); +int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +} // namespace buminiz + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +#include +#include + +namespace buminiz { + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) malloc(x) + #define MZ_FREE(x) free(x) + #define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE inline +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +//static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; + mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; + + mz_uint64 t = MZ_MAX(a, b); + if (((mz_ulong)t) != t) + t = (mz_ulong)(-1); + + return (mz_ulong)t; +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = adler32_checking ? TINFL_FLAG_COMPUTE_ADLER32 : 0; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + return mz_inflate2(pStream, flush, MZ_TRUE); +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + //if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + if ((decomp_flags & TINFL_FLAG_COMPUTE_ADLER32) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER | (level <= 3 ? TDEFL_GREEDY_PARSING_FLAG : 0)); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +} // namespace buminiz + +#endif // MINIZ_HEADER_FILE_ONLY diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_ocl_kernels.h b/Source/ThirdParty/basis_universal/encoder/basisu_ocl_kernels.h new file mode 100644 index 000000000..3993873bc --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_ocl_kernels.h @@ -0,0 +1,3784 @@ +unsigned char ocl_kernels_cl[] = { + 0x2f, 0x2f, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x5f, 0x44, + 0x45, 0x42, 0x55, 0x47, 0x0d, 0x0a, 0x0d, 0x0a, 0x23, 0x69, 0x66, 0x6e, + 0x64, 0x65, 0x66, 0x20, 0x4e, 0x55, 0x4c, 0x4c, 0x0d, 0x0a, 0x09, 0x23, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x4e, 0x55, 0x4c, 0x4c, 0x20, + 0x30, 0x4c, 0x0d, 0x0a, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, 0x0d, 0x0a, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x63, 0x68, + 0x61, 0x72, 0x20, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, + 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x63, 0x68, 0x61, + 0x72, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x68, + 0x6f, 0x72, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x3b, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x73, + 0x68, 0x6f, 0x72, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, + 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, + 0x66, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, + 0x66, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, + 0x20, 0x75, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x36, + 0x34, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, + 0x64, 0x65, 0x66, 0x20, 0x75, 0x63, 0x68, 0x61, 0x72, 0x34, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x55, 0x49, + 0x4e, 0x54, 0x33, 0x32, 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x30, 0x78, 0x46, + 0x46, 0x46, 0x46, 0x46, 0x46, 0x46, 0x46, 0x55, 0x4c, 0x0d, 0x0a, 0x23, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x49, 0x4e, 0x54, 0x36, 0x34, + 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x4c, 0x4f, 0x4e, 0x47, 0x5f, 0x4d, 0x41, + 0x58, 0x0d, 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x55, + 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x55, 0x4c, + 0x4f, 0x4e, 0x47, 0x5f, 0x4d, 0x41, 0x58, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, + 0x6e, 0x74, 0x20, 0x73, 0x71, 0x75, 0x61, 0x72, 0x65, 0x69, 0x28, 0x69, + 0x6e, 0x74, 0x20, 0x61, 0x29, 0x20, 0x7b, 0x20, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x61, 0x20, 0x2a, 0x20, 0x61, 0x3b, 0x20, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x23, 0x69, 0x66, 0x64, 0x65, 0x66, 0x20, 0x5f, 0x44, + 0x45, 0x42, 0x55, 0x47, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x6c, 0x69, 0x6e, + 0x65, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x5f, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x78, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x61, 0x6e, 0x74, 0x20, 0x63, 0x68, 0x61, 0x72, 0x20, 0x2a, 0x70, 0x4d, + 0x73, 0x67, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, + 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x21, 0x78, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x72, 0x69, + 0x6e, 0x74, 0x66, 0x28, 0x22, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, + 0x29, 0x20, 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x20, 0x6f, 0x6e, 0x20, + 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x25, 0x69, 0x3a, 0x20, 0x25, 0x73, 0x5c, + 0x6e, 0x22, 0x2c, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x2c, 0x20, 0x70, 0x4d, + 0x73, 0x67, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x23, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x73, 0x65, 0x72, + 0x74, 0x28, 0x78, 0x29, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x5f, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x78, 0x2c, 0x20, + 0x23, 0x78, 0x2c, 0x20, 0x5f, 0x5f, 0x4c, 0x49, 0x4e, 0x45, 0x5f, 0x5f, + 0x29, 0x0d, 0x0a, 0x23, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x23, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x73, 0x65, 0x72, + 0x74, 0x28, 0x78, 0x29, 0x0d, 0x0a, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, + 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x32, 0x35, 0x35, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x78, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, + 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x78, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, + 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, + 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, + 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, + 0x6c, 0x61, 0x67, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x78, 0x2c, 0x20, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x2a, 0x70, 0x44, 0x69, 0x64, 0x5f, 0x63, 0x6c, + 0x61, 0x6d, 0x70, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x78, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x2a, 0x70, 0x44, 0x69, 0x64, 0x5f, 0x63, 0x6c, + 0x61, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x30, 0x3b, + 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x20, + 0x69, 0x66, 0x20, 0x28, 0x78, 0x20, 0x3e, 0x20, 0x32, 0x35, 0x35, 0x29, + 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2a, 0x70, 0x44, 0x69, + 0x64, 0x5f, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x74, 0x72, + 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x20, 0x32, 0x35, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, + 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x78, 0x29, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, + 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, + 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, + 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x65, 0x6e, + 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, + 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, + 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, + 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, 0x6d, + 0x73, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, + 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, + 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, + 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, + 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x64, 0x29, 0x29, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, + 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x31, 0x36, 0x5d, + 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x5b, 0x79, 0x2a, 0x34, 0x2b, 0x78, 0x5d, + 0x0d, 0x0a, 0x7d, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, + 0x6e, 0x63, 0x65, 0x28, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x70, 0x65, 0x72, + 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x31, 0x2c, 0x20, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, + 0x32, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x61, 0x6c, 0x70, 0x68, + 0x61, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x29, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x54, 0x68, + 0x69, 0x73, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x43, 0x50, 0x55, 0x20, 0x63, 0x6f, 0x64, 0x65, 0x2c, + 0x20, 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, + 0x65, 0x66, 0x75, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, + 0x20, 0x64, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x78, 0x20, 0x2d, + 0x20, 0x65, 0x32, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, + 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x79, 0x20, + 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, + 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x7a, + 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, + 0x6c, 0x20, 0x3d, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x31, 0x34, 0x20, + 0x2b, 0x20, 0x64, 0x67, 0x20, 0x2a, 0x20, 0x34, 0x35, 0x20, 0x2b, 0x20, + 0x64, 0x62, 0x20, 0x2a, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, + 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x72, 0x20, + 0x3d, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x36, 0x34, 0x20, 0x2d, 0x20, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x62, + 0x20, 0x3d, 0x20, 0x64, 0x62, 0x20, 0x2a, 0x20, 0x36, 0x34, 0x20, 0x2d, + 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x64, 0x20, 0x3d, + 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x6c, 0x20, 0x2a, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, + 0x5f, 0x6c, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x55, 0x29, 0x20, 0x2b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x28, 0x28, 0x28, 0x28, 0x75, 0x69, 0x6e, + 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x72, 0x20, + 0x2a, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x72, 0x29, 0x20, + 0x3e, 0x3e, 0x20, 0x35, 0x55, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x36, 0x55, + 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x20, 0x2b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x28, 0x28, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, + 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x62, 0x20, 0x2a, 0x20, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x62, 0x29, 0x20, 0x3e, 0x3e, + 0x20, 0x35, 0x55, 0x29, 0x20, 0x2a, 0x20, 0x33, 0x55, 0x29, 0x20, 0x3e, + 0x3e, 0x20, 0x37, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, + 0x69, 0x66, 0x20, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, + 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, + 0x64, 0x61, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x31, 0x2e, 0x77, 0x20, 0x2d, + 0x20, 0x65, 0x32, 0x2e, 0x77, 0x29, 0x20, 0x3c, 0x3c, 0x20, 0x37, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x64, 0x20, 0x2b, 0x3d, 0x20, 0x28, + 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x61, 0x20, 0x2a, 0x20, + 0x64, 0x61, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x69, 0x64, 0x3b, 0x0d, 0x0a, + 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, + 0x20, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, + 0x20, 0x65, 0x31, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, + 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, + 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, + 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, + 0x2e, 0x7a, 0x3b, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, + 0x64, 0x61, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x77, 0x20, 0x2d, 0x20, + 0x65, 0x32, 0x2e, 0x77, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, + 0x75, 0x72, 0x6e, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x64, 0x72, 0x20, + 0x2b, 0x20, 0x64, 0x67, 0x20, 0x2a, 0x20, 0x64, 0x67, 0x20, 0x2b, 0x20, + 0x64, 0x62, 0x20, 0x2a, 0x20, 0x64, 0x62, 0x20, 0x2b, 0x20, 0x64, 0x61, + 0x20, 0x2a, 0x20, 0x64, 0x61, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x31, + 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x65, + 0x31, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x79, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, + 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x7a, 0x3b, + 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, + 0x64, 0x72, 0x20, 0x2a, 0x20, 0x64, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x67, + 0x20, 0x2a, 0x20, 0x64, 0x67, 0x20, 0x2b, 0x20, 0x64, 0x62, 0x20, 0x2a, + 0x20, 0x64, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, + 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, + 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x69, 0x67, 0x20, 0x65, 0x6e, 0x64, + 0x69, 0x61, 0x6e, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x3a, 0x0d, + 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x69, 0x74, 0x20, 0x6f, 0x66, 0x73, + 0x3a, 0x20, 0x20, 0x35, 0x36, 0x20, 0x20, 0x34, 0x38, 0x20, 0x20, 0x34, + 0x30, 0x20, 0x20, 0x33, 0x32, 0x20, 0x20, 0x32, 0x34, 0x20, 0x20, 0x31, + 0x36, 0x20, 0x20, 0x20, 0x38, 0x20, 0x20, 0x20, 0x30, 0x0d, 0x0a, 0x09, + 0x2f, 0x2f, 0x20, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6f, 0x66, 0x73, 0x3a, + 0x20, 0x62, 0x30, 0x2c, 0x20, 0x62, 0x31, 0x2c, 0x20, 0x62, 0x32, 0x2c, + 0x20, 0x62, 0x33, 0x2c, 0x20, 0x62, 0x34, 0x2c, 0x20, 0x62, 0x35, 0x2c, + 0x20, 0x62, 0x36, 0x2c, 0x20, 0x62, 0x37, 0x20, 0x0d, 0x0a, 0x09, 0x75, + 0x6e, 0x69, 0x6f, 0x6e, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x75, + 0x69, 0x6e, 0x74, 0x36, 0x34, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x73, 0x5b, 0x38, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x65, 0x6e, 0x75, 0x6d, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x73, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, + 0x79, 0x74, 0x65, 0x73, 0x50, 0x65, 0x72, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x3d, 0x20, 0x38, 0x55, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x32, 0x55, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x31, + 0x55, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, + 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, + 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x4d, 0x61, 0x73, 0x6b, 0x20, 0x3d, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0x20, 0x2d, 0x20, 0x31, 0x55, 0x2c, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x53, 0x68, 0x69, 0x66, 0x74, 0x20, 0x3d, 0x20, 0x32, 0x55, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x53, 0x69, 0x7a, 0x65, 0x20, 0x3d, 0x20, 0x31, 0x55, 0x20, + 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x53, 0x68, 0x69, 0x66, 0x74, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x4c, 0x53, 0x42, 0x53, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x49, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x42, + 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x30, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x4d, 0x53, 0x42, + 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x49, 0x6e, 0x64, 0x69, + 0x63, 0x65, 0x73, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, + 0x20, 0x3d, 0x20, 0x31, 0x36, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x46, 0x6c, 0x69, 0x70, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x32, 0x2c, 0x0d, + 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x69, 0x66, 0x66, 0x42, + 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, + 0x33, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, + 0x72, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x33, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, + 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, + 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x52, 0x69, + 0x67, 0x68, 0x74, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, + 0x66, 0x69, 0x65, 0x72, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x69, 0x74, + 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x34, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x4c, 0x65, 0x66, 0x74, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, + 0x72, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x37, 0x2c, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x42, 0x61, 0x73, 0x65, 0x2b, 0x44, 0x65, + 0x6c, 0x74, 0x61, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, + 0x20, 0x28, 0x35, 0x20, 0x62, 0x69, 0x74, 0x20, 0x62, 0x61, 0x73, 0x65, + 0x73, 0x2c, 0x20, 0x33, 0x20, 0x62, 0x69, 0x74, 0x20, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x29, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, + 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, + 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x35, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4d, 0x61, 0x78, + 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, + 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, + 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, + 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x33, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x31, + 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, + 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, + 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x31, + 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, + 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, + 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x20, 0x3d, 0x20, 0x35, 0x39, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, + 0x3d, 0x20, 0x35, 0x31, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x42, + 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, + 0x34, 0x33, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, + 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, + 0x20, 0x35, 0x36, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x47, + 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, + 0x34, 0x38, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, + 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, + 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, + 0x30, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x41, 0x62, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x65, 0x20, 0x28, 0x6e, 0x6f, 0x6e, 0x2d, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x29, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, + 0x69, 0x6e, 0x67, 0x20, 0x28, 0x74, 0x77, 0x6f, 0x20, 0x34, 0x2d, 0x62, + 0x69, 0x74, 0x20, 0x70, 0x65, 0x72, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x20, 0x62, 0x61, 0x73, 0x65, 0x73, 0x29, 0x0d, + 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, + 0x74, 0x73, 0x20, 0x3d, 0x20, 0x34, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, + 0x6f, 0x6d, 0x70, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, + 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, + 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, + 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, + 0x36, 0x30, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, + 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x31, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x32, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x31, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x34, 0x2c, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x32, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x36, 0x2c, 0x0d, + 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x38, 0x2c, 0x0d, 0x0a, 0x09, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x34, 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x20, 0x3d, 0x20, 0x34, 0x30, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, + 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x20, 0x3d, 0x20, 0x2d, 0x34, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, + 0x33, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x44, 0x65, + 0x6c, 0x74, 0x61, 0x33, 0x3a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x30, + 0x20, 0x20, 0x20, 0x31, 0x20, 0x20, 0x20, 0x32, 0x20, 0x20, 0x20, 0x33, + 0x20, 0x20, 0x20, 0x34, 0x20, 0x20, 0x20, 0x35, 0x20, 0x20, 0x20, 0x36, + 0x20, 0x20, 0x20, 0x37, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x30, 0x30, + 0x30, 0x20, 0x30, 0x30, 0x31, 0x20, 0x30, 0x31, 0x30, 0x20, 0x30, 0x31, + 0x31, 0x20, 0x31, 0x30, 0x30, 0x20, 0x31, 0x30, 0x31, 0x20, 0x31, 0x31, + 0x30, 0x20, 0x31, 0x31, 0x31, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x30, + 0x20, 0x20, 0x20, 0x31, 0x20, 0x20, 0x20, 0x32, 0x20, 0x20, 0x20, 0x33, + 0x20, 0x20, 0x20, 0x2d, 0x34, 0x20, 0x20, 0x2d, 0x33, 0x20, 0x20, 0x2d, + 0x32, 0x20, 0x20, 0x2d, 0x31, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x42, 0x41, 0x53, + 0x49, 0x53, 0x55, 0x5f, 0x45, 0x54, 0x43, 0x31, 0x5f, 0x43, 0x4c, 0x55, + 0x53, 0x54, 0x45, 0x52, 0x5f, 0x46, 0x49, 0x54, 0x5f, 0x4f, 0x52, 0x44, + 0x45, 0x52, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x53, 0x49, 0x5a, + 0x45, 0x20, 0x28, 0x31, 0x36, 0x35, 0x29, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x7b, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x6d, + 0x5f, 0x76, 0x5b, 0x34, 0x5d, 0x3b, 0x20, 0x7d, 0x20, 0x67, 0x5f, 0x63, + 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, 0x5f, 0x6f, + 0x72, 0x64, 0x65, 0x72, 0x5f, 0x74, 0x61, 0x62, 0x5b, 0x42, 0x41, 0x53, + 0x49, 0x53, 0x55, 0x5f, 0x45, 0x54, 0x43, 0x31, 0x5f, 0x43, 0x4c, 0x55, + 0x53, 0x54, 0x45, 0x52, 0x5f, 0x46, 0x49, 0x54, 0x5f, 0x4f, 0x52, 0x44, + 0x45, 0x52, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x53, 0x49, 0x5a, + 0x45, 0x5d, 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x38, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, + 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, + 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x38, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x37, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x34, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x37, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x37, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x33, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, + 0x20, 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, + 0x38, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, + 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, + 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, + 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x37, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x34, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, + 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, + 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, + 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x34, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, + 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, + 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, + 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, + 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x37, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x37, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x37, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, + 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x38, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x36, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x37, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, + 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x37, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x37, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, + 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, + 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x5f, 0x65, + 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, + 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x7b, + 0x20, 0x2d, 0x38, 0x2c, 0x20, 0x20, 0x2d, 0x32, 0x2c, 0x20, 0x20, 0x20, + 0x32, 0x2c, 0x20, 0x20, 0x20, 0x38, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, + 0x2d, 0x31, 0x37, 0x2c, 0x20, 0x20, 0x2d, 0x35, 0x2c, 0x20, 0x20, 0x35, + 0x2c, 0x20, 0x20, 0x31, 0x37, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, + 0x32, 0x39, 0x2c, 0x20, 0x20, 0x2d, 0x39, 0x2c, 0x20, 0x20, 0x20, 0x39, + 0x2c, 0x20, 0x20, 0x32, 0x39, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x20, + 0x2d, 0x34, 0x32, 0x2c, 0x20, 0x2d, 0x31, 0x33, 0x2c, 0x20, 0x31, 0x33, + 0x2c, 0x20, 0x20, 0x34, 0x32, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, + 0x20, 0x2d, 0x36, 0x30, 0x2c, 0x20, 0x2d, 0x31, 0x38, 0x2c, 0x20, 0x31, + 0x38, 0x2c, 0x20, 0x20, 0x36, 0x30, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, + 0x2d, 0x38, 0x30, 0x2c, 0x20, 0x2d, 0x32, 0x34, 0x2c, 0x20, 0x32, 0x34, + 0x2c, 0x20, 0x20, 0x38, 0x30, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, + 0x31, 0x30, 0x36, 0x2c, 0x20, 0x2d, 0x33, 0x33, 0x2c, 0x20, 0x33, 0x33, + 0x2c, 0x20, 0x31, 0x30, 0x36, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, + 0x31, 0x38, 0x33, 0x2c, 0x20, 0x2d, 0x34, 0x37, 0x2c, 0x20, 0x34, 0x37, + 0x2c, 0x20, 0x31, 0x38, 0x33, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, 0x65, 0x74, + 0x63, 0x31, 0x5f, 0x74, 0x6f, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5b, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x3b, 0x0d, + 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, + 0x6f, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, + 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x3c, + 0x3d, 0x20, 0x36, 0x34, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, + 0x73, 0x65, 0x72, 0x74, 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x26, 0x26, 0x20, + 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x3c, 0x3d, 0x20, 0x38, 0x55, 0x29, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, + 0x6f, 0x66, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x3d, 0x3d, + 0x20, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, + 0x20, 0x2d, 0x20, 0x31, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x37, 0x20, 0x2d, 0x20, 0x28, 0x6f, + 0x66, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, + 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x26, 0x20, + 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, + 0x28, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, + 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x3e, 0x3e, + 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, + 0x73, 0x29, 0x20, 0x26, 0x20, 0x28, 0x28, 0x31, 0x20, 0x3c, 0x3c, 0x20, + 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, + 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, + 0x75, 0x6d, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x62, 0x69, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, + 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x3c, 0x3d, 0x20, 0x36, 0x34, + 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, + 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x26, 0x26, 0x20, 0x28, 0x6e, 0x75, 0x6d, + 0x20, 0x3c, 0x20, 0x33, 0x32, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, + 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x3d, 0x3d, 0x20, 0x28, 0x28, 0x6f, + 0x66, 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x20, 0x2d, 0x20, 0x31, + 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x62, 0x69, 0x74, 0x73, 0x20, + 0x3c, 0x20, 0x28, 0x31, 0x55, 0x20, 0x3c, 0x3c, 0x20, 0x6e, 0x75, 0x6d, + 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x37, 0x20, 0x2d, 0x20, + 0x28, 0x6f, 0x66, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x6f, 0x66, 0x73, 0x20, + 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x61, + 0x73, 0x6b, 0x20, 0x3d, 0x20, 0x28, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x6e, + 0x75, 0x6d, 0x29, 0x20, 0x2d, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x70, + 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x62, 0x79, + 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, + 0x28, 0x6d, 0x61, 0x73, 0x6b, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x5b, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x7c, + 0x3d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x73, 0x20, 0x3c, 0x3c, 0x20, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, + 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, + 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, + 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, + 0x26, 0x20, 0x31, 0x29, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x70, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x31, 0x3b, 0x0d, + 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x5b, 0x33, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x38, 0x5f, 0x74, 0x29, 0x28, 0x66, 0x6c, 0x69, 0x70, 0x29, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, + 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x20, 0x2a, 0x70, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, 0x3e, 0x6d, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x20, + 0x32, 0x29, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, + 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x62, 0x6f, 0x6f, + 0x6c, 0x20, 0x64, 0x69, 0x66, 0x66, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, + 0x33, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x32, 0x3b, 0x0d, 0x0a, 0x09, + 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, + 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x29, 0x28, 0x64, 0x69, 0x66, 0x66, 0x29, 0x20, 0x3c, + 0x3c, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x2f, 0x2f, 0x20, 0x52, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x73, 0x20, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x6d, 0x6f, 0x64, + 0x69, 0x66, 0x69, 0x65, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, + 0x28, 0x30, 0x2d, 0x37, 0x29, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x62, + 0x79, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x73, + 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x2e, 0x0d, + 0x0a, 0x2f, 0x2f, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x69, 0x64, 0x3d, 0x30, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x2f, 0x74, + 0x6f, 0x70, 0x20, 0x28, 0x43, 0x57, 0x20, 0x31, 0x29, 0x2c, 0x20, 0x31, + 0x3d, 0x72, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x62, 0x6f, 0x74, 0x74, 0x6f, + 0x6d, 0x20, 0x28, 0x43, 0x57, 0x20, 0x32, 0x29, 0x0d, 0x0a, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x64, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, + 0x65, 0x72, 0x74, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x69, 0x64, 0x20, 0x3c, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x75, 0x62, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, 0x3f, 0x20, 0x32, + 0x20, 0x3a, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x6f, 0x66, 0x73, + 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x2f, 0x2f, 0x20, 0x53, 0x65, 0x74, 0x73, 0x20, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x6d, 0x6f, 0x64, 0x69, 0x66, + 0x69, 0x65, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x28, 0x30, + 0x2d, 0x37, 0x29, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, + 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x73, 0x75, 0x62, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, 0x28, 0x30, 0x20, + 0x6f, 0x72, 0x20, 0x31, 0x29, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, + 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, + 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, + 0x74, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x64, 0x20, 0x3c, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, + 0x73, 0x65, 0x72, 0x74, 0x28, 0x74, 0x20, 0x3c, 0x20, 0x38, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, + 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, + 0x3f, 0x20, 0x32, 0x20, 0x3a, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x70, + 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, + 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x28, 0x37, 0x20, 0x3c, 0x3c, 0x20, 0x6f, + 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x7c, 0x3d, 0x20, + 0x28, 0x74, 0x20, 0x3c, 0x3c, 0x20, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x74, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x74, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x72, 0x61, + 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x29, 0x20, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, + 0x28, 0x28, 0x78, 0x20, 0x7c, 0x20, 0x79, 0x29, 0x20, 0x3c, 0x20, 0x34, + 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, + 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, + 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, + 0x66, 0x73, 0x20, 0x3d, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, + 0x2a, 0x70, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x37, 0x20, + 0x2d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x5b, 0x30, + 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x20, 0x26, 0x20, 0x31, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x28, + 0x70, 0x5b, 0x2d, 0x32, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x20, 0x26, + 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x76, 0x61, 0x6c, + 0x20, 0x3d, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x6d, 0x73, + 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x76, 0x61, 0x6c, 0x3b, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x52, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x65, 0x64, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x72, 0x61, + 0x6e, 0x67, 0x65, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x30, 0x2d, + 0x33, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x64, + 0x69, 0x72, 0x65, 0x63, 0x74, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, + 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, + 0x2e, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, + 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x29, 0x20, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, + 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x74, 0x6f, 0x5f, 0x73, + 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5b, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x67, 0x65, 0x74, 0x5f, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x78, 0x2c, 0x20, 0x79, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x20, 0x22, 0x76, 0x61, 0x6c, 0x22, 0x20, 0x72, 0x61, 0x6e, + 0x67, 0x65, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x30, 0x2d, 0x33, + 0x20, 0x61, 0x6e, 0x64, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x64, 0x69, + 0x72, 0x65, 0x63, 0x74, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x69, + 0x6e, 0x74, 0x6f, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x2e, + 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x76, 0x61, + 0x6c, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, + 0x72, 0x74, 0x28, 0x28, 0x78, 0x20, 0x7c, 0x20, 0x79, 0x20, 0x7c, 0x20, + 0x76, 0x61, 0x6c, 0x29, 0x20, 0x3c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, + 0x79, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x38, + 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x5b, 0x37, 0x20, 0x2d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x5d, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x62, 0x69, + 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x26, 0x20, 0x37, 0x3b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x61, 0x73, 0x6b, 0x20, 0x3d, + 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, + 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x76, 0x61, 0x6c, 0x20, + 0x3d, 0x20, 0x67, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x6f, 0x5f, 0x65, 0x74, + 0x63, 0x31, 0x5b, 0x76, 0x61, 0x6c, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x65, + 0x74, 0x63, 0x31, 0x5f, 0x76, 0x61, 0x6c, 0x20, 0x26, 0x20, 0x31, 0x3b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x73, 0x62, 0x20, 0x3d, 0x20, + 0x65, 0x74, 0x63, 0x31, 0x5f, 0x76, 0x61, 0x6c, 0x20, 0x3e, 0x3e, 0x20, + 0x31, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x30, 0x5d, 0x20, + 0x26, 0x3d, 0x20, 0x7e, 0x6d, 0x61, 0x73, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, + 0x70, 0x5b, 0x30, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x6c, 0x73, 0x62, + 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, + 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, + 0x5b, 0x2d, 0x32, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x6d, 0x61, 0x73, + 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x2d, 0x32, 0x5d, 0x20, 0x7c, + 0x3d, 0x20, 0x28, 0x6d, 0x73, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, + 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x64, 0x78, 0x2c, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x69, 0x64, 0x78, 0x29, + 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, + 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x32, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, + 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x34, 0x47, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, + 0x20, 0x34, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, + 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, + 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, + 0x20, 0x34, 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, + 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, + 0x3e, 0x3e, 0x20, 0x38, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, + 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x34, 0x47, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, + 0x34, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, + 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, + 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x42, + 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, + 0x34, 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, + 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, + 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x64, 0x78, 0x29, 0x20, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x3b, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x69, 0x64, 0x78, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, + 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x32, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, + 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x32, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, + 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, + 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x34, 0x47, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, + 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x34, 0x42, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, + 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, + 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, + 0x20, 0x3c, 0x3c, 0x20, 0x38, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, + 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, + 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, + 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x52, 0x42, + 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x2c, + 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x30, 0x29, 0x20, 0x26, + 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, + 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, + 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x47, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x28, 0x63, + 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, + 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x33, 0x31, + 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, + 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, + 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, + 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x52, 0x42, 0x69, 0x74, + 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, + 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x47, 0x42, 0x69, 0x74, + 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, + 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x42, 0x42, 0x69, 0x74, + 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, + 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x35, 0x55, 0x29, 0x20, 0x7c, 0x20, + 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x30, 0x55, 0x29, 0x29, 0x3b, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x33, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, + 0x36, 0x29, 0x20, 0x26, 0x20, 0x37, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x47, + 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x26, + 0x20, 0x37, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, + 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x63, 0x20, + 0x26, 0x20, 0x37, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, + 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, + 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, + 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, + 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x33, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, + 0x2c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, + 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x33, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x2c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, + 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x2c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, + 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, + 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, + 0x3c, 0x20, 0x36, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x2a, + 0x70, 0x52, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x47, 0x2c, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x42, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, + 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, + 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x36, 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, + 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x28, + 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, + 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, + 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x70, + 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, + 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x72, 0x20, 0x3e, 0x3d, 0x20, 0x34, 0x29, 0x20, 0x72, 0x20, 0x2d, 0x3d, + 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x67, 0x20, + 0x3e, 0x3d, 0x20, 0x34, 0x29, 0x20, 0x67, 0x20, 0x2d, 0x3d, 0x20, 0x38, + 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x62, 0x20, 0x3e, 0x3d, + 0x20, 0x34, 0x29, 0x20, 0x62, 0x20, 0x2d, 0x3d, 0x20, 0x38, 0x3b, 0x0d, + 0x0a, 0x09, 0x2a, 0x70, 0x52, 0x20, 0x3d, 0x20, 0x72, 0x3b, 0x0d, 0x0a, + 0x09, 0x2a, 0x70, 0x47, 0x20, 0x3d, 0x20, 0x67, 0x3b, 0x0d, 0x0a, 0x09, + 0x2a, 0x70, 0x42, 0x20, 0x3d, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x64, 0x65, 0x6c, 0x74, + 0x61, 0x33, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x20, 0x2a, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x2c, 0x20, + 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x61, 0x6c, + 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, + 0x74, 0x20, 0x64, 0x72, 0x2c, 0x20, 0x64, 0x67, 0x2c, 0x20, 0x64, 0x62, + 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x33, 0x28, 0x26, 0x64, 0x72, 0x2c, 0x20, 0x26, 0x64, 0x67, + 0x2c, 0x20, 0x26, 0x64, 0x62, 0x2c, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x28, + 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, + 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x20, 0x3d, + 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x55, 0x29, 0x20, + 0x26, 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, 0x67, 0x3b, + 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, + 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x30, 0x55, 0x29, 0x20, 0x26, + 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, 0x72, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x75, 0x63, + 0x63, 0x65, 0x73, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, + 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x29, 0x28, 0x72, 0x20, 0x7c, 0x20, 0x67, 0x20, + 0x7c, 0x20, 0x62, 0x29, 0x20, 0x3e, 0x20, 0x33, 0x31, 0x55, 0x29, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x73, 0x75, 0x63, 0x63, 0x65, + 0x73, 0x73, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x28, 0x72, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x28, 0x67, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x28, 0x62, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x3c, + 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x20, 0x3e, 0x3e, + 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, + 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, + 0x20, 0x28, 0x67, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, + 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3e, 0x3e, + 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x2a, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, + 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x29, 0x28, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x6d, + 0x69, 0x6e, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, 0x35, + 0x35, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, + 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, + 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x62, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x3b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x67, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x55, + 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, + 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x30, 0x55, 0x29, 0x20, 0x26, + 0x20, 0x33, 0x31, 0x55, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, + 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x20, + 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, + 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, + 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, + 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, + 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, + 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x6d, 0x69, 0x6e, + 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x55, + 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x75, 0x69, 0x6e, + 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x2c, 0x20, 0x62, 0x6f, 0x6f, + 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, + 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x63, 0x6b, + 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x20, 0x26, 0x20, + 0x31, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x20, 0x3e, + 0x3e, 0x20, 0x34, 0x55, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x55, 0x3b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x72, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x55, + 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, + 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, + 0x28, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, + 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, + 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x67, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x3c, + 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x72, 0x3b, 0x0d, 0x0a, + 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x29, 0x28, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, + 0x6d, 0x69, 0x6e, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, + 0x35, 0x35, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x2f, 0x2f, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, + 0x20, 0x64, 0x69, 0x64, 0x6e, 0x27, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, + 0x70, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x20, 0x69, 0x66, 0x20, 0x61, + 0x6e, 0x79, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, + 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x65, 0x64, 0x0d, 0x0a, 0x62, 0x6f, + 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x2a, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x20, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, + 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x75, + 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, + 0x28, 0x26, 0x62, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x29, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x32, 0x35, + 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, + 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x2c, + 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, + 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x2a, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, + 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x64, 0x63, 0x20, 0x3d, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, + 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, + 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, + 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, + 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, + 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, + 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x2c, 0x20, + 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, + 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x7a, 0x20, + 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, + 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, + 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, + 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, + 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, + 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, + 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x2c, + 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x7a, + 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, + 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, + 0x32, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, + 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, + 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, + 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, + 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, + 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, + 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, + 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, + 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, + 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, + 0x5b, 0x33, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, + 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, + 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, + 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, + 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, + 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, + 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, + 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x26, 0x64, + 0x63, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x64, 0x63, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x67, 0x65, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x73, 0x35, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, + 0x2a, 0x70, 0x42, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2c, + 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x20, 0x2f, 0x2a, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x20, 0x2a, 0x2f, + 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x2a, + 0x70, 0x42, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x73, + 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x62, 0x2e, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x2e, 0x78, + 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, + 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x62, 0x2e, 0x79, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x3c, + 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, 0x79, 0x20, + 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x2e, + 0x7a, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x3c, 0x3c, 0x20, + 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x3e, 0x3e, + 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, + 0x74, 0x2a, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, + 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x73, 0x5b, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, + 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, + 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, + 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x29, 0x2c, + 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, + 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, + 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, + 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x5b, 0x30, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, + 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x78, 0x20, + 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, + 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, + 0x31, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, + 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x29, + 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, + 0x32, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, + 0x35, 0x35, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, + 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, + 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, + 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x29, 0x2c, 0x20, + 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, + 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x3d, 0x20, + 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, + 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, + 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, + 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, + 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x5b, 0x33, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, + 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, + 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, + 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x64, + 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x2a, 0x20, 0x70, 0x53, 0x6f, 0x75, + 0x72, 0x63, 0x65, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, + 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, + 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x5f, 0x73, 0x75, 0x62, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2f, 0x2a, 0x3d, 0x20, 0x30, 0x2a, 0x2f, + 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, + 0x6e, 0x64, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x2f, 0x2a, 0x3d, 0x20, 0x32, 0x2a, 0x2f, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, + 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x75, + 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x62, 0x65, 0x67, + 0x69, 0x6e, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, + 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3c, 0x20, + 0x65, 0x6e, 0x64, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x3b, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2b, 0x2b, + 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, + 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, + 0x30, 0x3b, 0x20, 0x79, 0x20, 0x3c, 0x20, 0x32, 0x3b, 0x20, 0x79, 0x2b, + 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x78, + 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x78, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, + 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x30, 0x3b, + 0x20, 0x73, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, 0x2b, 0x2b, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, + 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x73, + 0x5d, 0x2c, 0x20, 0x70, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x70, + 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x78, 0x20, 0x2b, 0x20, 0x28, 0x73, + 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x20, + 0x2b, 0x20, 0x79, 0x29, 0x20, 0x2a, 0x20, 0x34, 0x5d, 0x2c, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x72, 0x72, 0x20, 0x3c, 0x20, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x73, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x78, 0x2c, + 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, + 0x32, 0x20, 0x2b, 0x20, 0x79, 0x2c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, + 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, + 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, + 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x79, 0x20, 0x3c, 0x20, 0x34, 0x3b, + 0x20, 0x79, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x32, 0x3b, 0x20, 0x78, 0x2b, 0x2b, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x55, + 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x20, 0x73, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, + 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, + 0x34, 0x5f, 0x74, 0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, + 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, + 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x73, 0x5b, 0x73, 0x5d, 0x2c, 0x20, 0x70, 0x53, 0x6f, 0x75, 0x72, 0x63, + 0x65, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x28, 0x73, 0x75, + 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, + 0x2b, 0x20, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x20, 0x2a, 0x20, 0x34, 0x5d, + 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x72, 0x72, + 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, + 0x32, 0x20, 0x2b, 0x20, 0x78, 0x2c, 0x20, 0x79, 0x2c, 0x20, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x29, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, + 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x5f, + 0x72, 0x67, 0x62, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x72, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x67, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x62, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, + 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x61, 0x73, 0x20, + 0x3d, 0x20, 0x31, 0x32, 0x37, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, + 0x20, 0x2a, 0x20, 0x31, 0x35, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, + 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x2a, 0x20, 0x31, + 0x35, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, + 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, + 0x3d, 0x20, 0x28, 0x62, 0x20, 0x2a, 0x20, 0x31, 0x35, 0x55, 0x20, 0x2b, + 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, + 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, + 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x72, 0x2c, 0x20, 0x31, 0x35, + 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x6d, 0x69, + 0x6e, 0x28, 0x67, 0x2c, 0x20, 0x31, 0x35, 0x55, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x62, 0x2c, 0x20, + 0x31, 0x35, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, + 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, + 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, + 0x3c, 0x20, 0x38, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x61, 0x73, 0x20, 0x3d, + 0x20, 0x31, 0x32, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, + 0x5f, 0x72, 0x67, 0x62, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, + 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x2c, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x2c, 0x20, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, + 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, + 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x2c, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x62, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, + 0x72, 0x74, 0x28, 0x28, 0x72, 0x20, 0x3e, 0x3d, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, + 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x72, 0x20, 0x3c, + 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x67, 0x20, + 0x3e, 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, + 0x26, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, + 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, + 0x72, 0x74, 0x28, 0x28, 0x62, 0x20, 0x3e, 0x3d, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, + 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x62, 0x20, 0x3c, + 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x20, 0x30, 0x29, + 0x20, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x20, 0x67, 0x20, + 0x2b, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x62, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x20, 0x62, 0x20, 0x2b, 0x3d, 0x20, + 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, + 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, + 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x20, + 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x36, 0x29, 0x29, 0x3b, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x34, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x31, 0x5f, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, + 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x66, 0x61, + 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x30, 0x5f, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, + 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, + 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x31, 0x5f, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x72, 0x67, 0x62, 0x28, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x2c, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x2c, 0x20, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x62, 0x69, 0x61, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x32, 0x37, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, + 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x2a, 0x20, 0x33, 0x31, + 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, + 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, + 0x20, 0x28, 0x67, 0x20, 0x2a, 0x20, 0x33, 0x31, 0x55, 0x20, 0x2b, 0x20, + 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, + 0x2a, 0x20, 0x33, 0x31, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, + 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, + 0x6e, 0x28, 0x72, 0x2c, 0x20, 0x33, 0x31, 0x55, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x67, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x67, 0x2c, 0x20, + 0x33, 0x31, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x20, 0x3d, 0x20, + 0x6d, 0x69, 0x6e, 0x28, 0x62, 0x2c, 0x20, 0x33, 0x31, 0x55, 0x29, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, + 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, + 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x35, 0x55, 0x29, + 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x30, 0x55, + 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, + 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x20, 0x63, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x72, 0x67, 0x62, 0x28, 0x63, 0x2e, + 0x78, 0x2c, 0x20, 0x63, 0x2e, 0x79, 0x2c, 0x20, 0x63, 0x2e, 0x7a, 0x2c, + 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, + 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, + 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, + 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, + 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x28, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, + 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, + 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, + 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x63, + 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x79, + 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, + 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, + 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x64, 0x72, 0x2c, + 0x20, 0x64, 0x67, 0x2c, 0x20, 0x64, 0x62, 0x29, 0x29, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, + 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x20, 0x63, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, + 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, + 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, + 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x30, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x30, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x63, 0x68, 0x65, 0x63, + 0x6b, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x31, 0x5f, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, + 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, + 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, + 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x28, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, + 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, + 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x79, 0x20, 0x2d, + 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, + 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x28, 0x28, 0x64, 0x72, 0x20, 0x3c, + 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, + 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x7c, 0x7c, 0x20, + 0x28, 0x64, 0x72, 0x20, 0x3e, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, + 0x29, 0x29, 0x20, 0x7c, 0x7c, 0x0d, 0x0a, 0x09, 0x09, 0x28, 0x28, 0x64, + 0x67, 0x20, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, + 0x7c, 0x7c, 0x20, 0x28, 0x64, 0x67, 0x20, 0x3e, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, + 0x4d, 0x61, 0x78, 0x29, 0x29, 0x20, 0x7c, 0x7c, 0x0d, 0x0a, 0x09, 0x09, + 0x28, 0x28, 0x64, 0x62, 0x20, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, + 0x6e, 0x29, 0x20, 0x7c, 0x7c, 0x20, 0x28, 0x64, 0x62, 0x20, 0x3e, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, + 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x29, 0x0d, 0x0a, 0x09, + 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x33, 0x28, 0x64, 0x72, 0x2c, 0x20, 0x64, 0x67, 0x2c, 0x20, + 0x64, 0x62, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, + 0x5f, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, + 0x2a, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x29, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x20, 0x3d, 0x20, 0x30, + 0x2c, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x20, 0x3d, 0x20, 0x30, 0x3b, + 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, + 0x79, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x79, 0x2b, 0x2b, 0x29, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x78, + 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, + 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, + 0x3d, 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, + 0x5b, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x20, 0x2a, 0x20, 0x34, 0x5d, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x73, 0x20, 0x26, 0x20, 0x31, 0x2c, + 0x20, 0x6d, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x73, 0x20, 0x3e, 0x3e, 0x20, + 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x77, + 0x6f, 0x72, 0x64, 0x33, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x6c, 0x73, 0x62, + 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x77, 0x6f, 0x72, 0x64, + 0x32, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x6d, 0x73, 0x62, 0x20, 0x3c, 0x3c, + 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x37, 0x5d, 0x20, 0x3d, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, + 0x64, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x36, + 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, + 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x38, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, + 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x35, 0x5d, 0x20, + 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, + 0x77, 0x6f, 0x72, 0x64, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x73, 0x5b, 0x34, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x38, 0x5f, 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x20, 0x3e, + 0x3e, 0x20, 0x38, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x2f, 0x2f, 0x20, 0x2d, 0x2d, 0x2d, 0x2d, 0x20, 0x45, 0x43, 0x31, 0x53, + 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, + 0x69, 0x6e, 0x67, 0x2f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, + 0x20, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, + 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, + 0x65, 0x76, 0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x38, 0x5d, 0x5b, 0x32, 0x35, 0x36, 0x5d, + 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x39, + 0x39, 0x25, 0x20, 0x74, 0x68, 0x72, 0x65, 0x73, 0x68, 0x6f, 0x6c, 0x64, + 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, + 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x0d, + 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, + 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, + 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, + 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, + 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x5f, 0x74, + 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x5f, 0x75, 0x6e, 0x73, + 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, + 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x67, 0x65, + 0x74, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x62, 0x72, 0x2c, 0x20, 0x62, 0x67, 0x2c, 0x20, + 0x62, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x62, 0x72, 0x20, + 0x3d, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x32, + 0x29, 0x20, 0x7c, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x3c, 0x3c, + 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x67, 0x20, 0x3d, 0x20, + 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x20, + 0x7c, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3c, 0x3c, 0x20, 0x33, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x7a, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x20, 0x7c, 0x20, + 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, + 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x29, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x62, + 0x72, 0x2c, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, + 0x62, 0x67, 0x2c, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, + 0x29, 0x62, 0x62, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, + 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, + 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, + 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, + 0x61, 0x74, 0x65, 0x73, 0x20, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, + 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x38, 0x5f, 0x74, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x6d, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x36, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x3b, 0x0d, + 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, + 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, + 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x62, 0x72, + 0x2c, 0x20, 0x6d, 0x5f, 0x62, 0x67, 0x2c, 0x20, 0x6d, 0x5f, 0x62, 0x62, + 0x3b, 0x0d, 0x0a, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x6d, + 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, + 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x6d, 0x61, 0x78, 0x5f, + 0x63, 0x6f, 0x6d, 0x70, 0x5f, 0x73, 0x70, 0x72, 0x65, 0x61, 0x64, 0x3b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, + 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x20, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, + 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, + 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x65, 0x76, 0x61, + 0x6c, 0x75, 0x61, 0x74, 0x65, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x28, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x20, 0x2a, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, + 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x20, 0x2a, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, + 0x20, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, + 0x74, 0x65, 0x73, 0x20, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, + 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x2a, 0x20, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x0d, 0x0a, 0x09, 0x65, + 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, + 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, + 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x20, 0x70, + 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, + 0x38, 0x5f, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x5f, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x36, 0x5d, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, + 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x76, + 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6f, 0x72, 0x64, + 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, + 0x0a, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, + 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x66, 0x6f, + 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, + 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, + 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x3b, 0x20, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2b, 0x2b, 0x29, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x54, 0x4f, + 0x44, 0x4f, 0x3a, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x63, 0x68, 0x65, + 0x63, 0x6b, 0x20, 0x69, 0x73, 0x20, 0x65, 0x71, 0x75, 0x69, 0x76, 0x61, + 0x6c, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x6f, 0x20, 0x6d, 0x65, 0x64, 0x69, + 0x75, 0x6d, 0x20, 0x71, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x20, 0x69, + 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x43, 0x2b, 0x2b, 0x20, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x21, 0x67, 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x5f, 0x64, 0x69, + 0x73, 0x74, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x69, 0x6e, + 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x5b, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x6d, 0x61, 0x78, + 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x5f, 0x73, 0x70, 0x72, 0x65, 0x61, 0x64, + 0x5d, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x69, + 0x6e, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x2a, 0x20, + 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, + 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x20, + 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x79, + 0x64, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x73, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x73, 0x5b, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, + 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x29, 0x2c, + 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x61, + 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2b, + 0x20, 0x79, 0x64, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, + 0x35, 0x35, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x29, 0x2c, 0x20, 0x32, + 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, + 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x36, + 0x34, 0x5f, 0x74, 0x20, 0x63, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x63, + 0x20, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x73, 0x3b, 0x20, 0x63, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, + 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, + 0x63, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x20, 0x3d, 0x20, 0x33, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, + 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, + 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, + 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, + 0x30, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, + 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, + 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, + 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, + 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x32, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, + 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, + 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, + 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, + 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, + 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, + 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, + 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, + 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, + 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, + 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, + 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x20, 0x3d, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x75, + 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3c, 0x3d, 0x20, + 0x31, 0x36, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x74, 0x65, 0x6d, + 0x70, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, + 0x63, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, + 0x74, 0x29, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x57, + 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x3f, 0x20, 0x28, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2a, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x29, 0x70, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x5b, 0x63, 0x5d, 0x29, 0x20, 0x3a, 0x20, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x20, 0x3e, 0x3d, 0x20, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, + 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, + 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x70, 0x54, + 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, + 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, + 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x75, 0x6d, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3c, 0x3d, 0x20, 0x31, 0x36, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, + 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, + 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x74, + 0x65, 0x6d, 0x70, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, + 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x76, + 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, + 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, + 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6f, + 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, + 0x73, 0x73, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x42, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x29, 0x0d, 0x0a, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x54, 0x72, + 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, + 0x70, 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x2a, 0x70, + 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x20, 0x3d, 0x20, 0x2a, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x20, 0x3d, 0x20, + 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x72, + 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, + 0x73, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, + 0x64, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x28, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, + 0x2a, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, + 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6e, + 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, + 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, + 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x3d, 0x20, 0x33, 0x31, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x32, 0x35, 0x35, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, + 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, + 0x3d, 0x20, 0x30, 0x2c, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x3d, + 0x20, 0x30, 0x2c, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x3d, 0x20, + 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x66, + 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x20, + 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x20, 0x3d, 0x20, 0x70, 0x50, 0x69, + 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x09, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x63, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, + 0x6d, 0x61, 0x78, 0x28, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2c, 0x20, 0x63, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, + 0x69, 0x66, 0x20, 0x28, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x20, 0x3d, 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, + 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, + 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, 0x63, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, 0x3d, 0x20, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, 0x63, 0x2e, 0x79, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, + 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, + 0x63, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, + 0x65, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, + 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x2e, 0x78, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, + 0x3d, 0x20, 0x63, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, + 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x2e, 0x7a, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x09, 0x0d, 0x0a, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x61, + 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, + 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, + 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x73, 0x75, 0x6d, + 0x5f, 0x72, 0x20, 0x2f, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, + 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x76, 0x67, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3d, 0x20, 0x28, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, + 0x2f, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, + 0x61, 0x74, 0x29, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2f, 0x20, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, + 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, + 0x6d, 0x5f, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x5f, 0x73, + 0x70, 0x72, 0x65, 0x61, 0x64, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x28, + 0x6d, 0x61, 0x78, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x61, 0x78, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x28, + 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x78, 0x2c, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x61, + 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2d, 0x20, + 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x2e, 0x79, 0x29, 0x2c, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, + 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, + 0x2d, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x54, 0x4f, 0x44, 0x4f, 0x3a, 0x20, + 0x54, 0x68, 0x65, 0x20, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x69, 0x6e, 0x67, + 0x20, 0x68, 0x65, 0x72, 0x65, 0x20, 0x63, 0x6f, 0x75, 0x6c, 0x64, 0x20, + 0x62, 0x65, 0x20, 0x69, 0x6d, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x64, 0x2c, + 0x20, 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x44, + 0x58, 0x54, 0x31, 0x2f, 0x42, 0x43, 0x31, 0x2e, 0x0d, 0x0a, 0x09, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x72, 0x20, + 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, + 0x29, 0x28, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, + 0x78, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, + 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, + 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, + 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, + 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x61, 0x76, 0x67, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2a, 0x20, 0x28, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, + 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, + 0x62, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, + 0x6e, 0x74, 0x29, 0x28, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x7a, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, + 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, + 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x53, + 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, + 0x76, 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, + 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, + 0x58, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, + 0x64, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, + 0x69, 0x74, 0x28, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, + 0x6d, 0x73, 0x5f, 0x74, 0x6f, 0x5f, 0x74, 0x72, 0x79, 0x2c, 0x0d, 0x0a, + 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, + 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x2a, + 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, + 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, + 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6e, 0x75, + 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, + 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x4c, 0x49, + 0x4d, 0x49, 0x54, 0x20, 0x3d, 0x20, 0x33, 0x31, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, + 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, + 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, + 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x20, 0x63, 0x75, + 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, + 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x72, 0x2c, 0x20, 0x70, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x67, 0x2c, 0x20, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x62, 0x2c, + 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, + 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, 0x65, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, + 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x63, + 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, 0x26, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, + 0x26, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x3d, + 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x70, 0x65, 0x72, 0x6d, 0x73, 0x5f, 0x74, 0x6f, 0x5f, 0x74, + 0x72, 0x79, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, + 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x2c, + 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, + 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, + 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, + 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, + 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, + 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, + 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, + 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, + 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, + 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x2a, 0x70, + 0x4e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x73, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x66, 0x69, 0x74, 0x5f, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x5f, + 0x74, 0x61, 0x62, 0x5b, 0x69, 0x5d, 0x2e, 0x6d, 0x5f, 0x76, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x71, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x20, 0x71, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x71, 0x2b, 0x2b, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x79, 0x64, 0x5f, + 0x74, 0x65, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, + 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x71, 0x5d, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, + 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, + 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, + 0x71, 0x5d, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, + 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, + 0x20, 0x2b, 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, + 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, + 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, 0x6d, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x71, + 0x5d, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x62, + 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, + 0x2b, 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, 0x73, + 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, + 0x6d, 0x5f, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, 0x6d, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x71, 0x5d, + 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x62, 0x61, + 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2b, + 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, 0x73, 0x65, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x28, 0x21, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, + 0x5f, 0x72, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x21, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x29, 0x20, 0x26, 0x26, + 0x20, 0x28, 0x21, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, + 0x5f, 0x62, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, + 0x74, 0x69, 0x6e, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, + 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x5f, + 0x66, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x28, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x29, + 0x20, 0x2f, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x76, 0x67, + 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x67, 0x5f, 0x66, 0x20, 0x3d, + 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x29, 0x20, 0x2f, 0x20, + 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x62, 0x5f, 0x66, 0x20, 0x3d, 0x20, 0x28, 0x66, + 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, + 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x29, 0x20, 0x2f, 0x20, 0x38, 0x3b, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, + 0x6e, 0x74, 0x20, 0x62, 0x72, 0x31, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, + 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x28, 0x70, 0x53, + 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x61, 0x76, + 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x5f, 0x66, 0x29, + 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, + 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, + 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x62, 0x67, 0x31, 0x20, 0x3d, 0x20, 0x63, 0x6c, + 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x28, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x61, + 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x67, 0x5f, 0x66, + 0x29, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, + 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, + 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, + 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x62, 0x31, 0x20, 0x3d, 0x20, 0x63, + 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x28, + 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, + 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2d, 0x20, + 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x62, 0x5f, + 0x66, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, + 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, + 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, + 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, + 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, + 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x62, 0x72, 0x31, 0x2c, 0x20, + 0x62, 0x67, 0x31, 0x2c, 0x20, 0x62, 0x62, 0x31, 0x2c, 0x20, 0x32, 0x35, + 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, + 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, 0x65, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, + 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x63, + 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, 0x26, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, + 0x26, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x3d, 0x20, + 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, + 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x2f, 0x2f, 0x20, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x61, 0x6e, + 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x67, 0x69, 0x76, 0x65, 0x6e, 0x20, 0x61, 0x20, 0x34, 0x78, 0x34, + 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x2e, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, + 0x69, 0x64, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, + 0x63, 0x31, 0x73, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x28, 0x0d, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, + 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, + 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, + 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x26, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, + 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, + 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x28, 0x26, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x70, 0x49, 0x6e, 0x70, + 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x4e, 0x55, 0x4c, 0x4c, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x66, 0x69, 0x74, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, + 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, + 0x65, 0x72, 0x6d, 0x73, 0x2c, 0x20, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x31, + 0x36, 0x2c, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x73, 0x2c, 0x20, 0x4e, 0x55, 0x4c, 0x4c, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, + 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, + 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, + 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, + 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, + 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, + 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, + 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, + 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, + 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, + 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, + 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x64, 0x29, 0x29, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, + 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, + 0x6c, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x70, + 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, + 0x0a, 0x7d, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, + 0x73, 0x74, 0x65, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, + 0x44, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, 0x20, 0x45, 0x54, + 0x43, 0x31, 0x53, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x67, 0x69, 0x76, + 0x65, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x61, 0x72, 0x62, 0x69, 0x74, 0x72, + 0x61, 0x72, 0x79, 0x20, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x20, 0x61, 0x72, + 0x72, 0x61, 0x79, 0x20, 0x6f, 0x66, 0x20, 0x34, 0x78, 0x34, 0x20, 0x69, + 0x6e, 0x70, 0x75, 0x74, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2e, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, + 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x6e, 0x63, 0x6f, + 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x66, 0x72, 0x6f, + 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, + 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, + 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, + 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x2a, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x77, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, + 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, + 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, + 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, + 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, + 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, + 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, + 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, + 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, + 0x73, 0x74, 0x65, 0x72, 0x2d, 0x3e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, + 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, + 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x77, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x70, + 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x2d, 0x3e, + 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, + 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, + 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, + 0x69, 0x6e, 0x69, 0x74, 0x28, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, + 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, + 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, + 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x63, + 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, 0x28, 0x70, + 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, 0x6d, 0x73, 0x2c, 0x20, 0x26, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2c, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, + 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x26, + 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, + 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, + 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, + 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, + 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, + 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x70, + 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x2d, 0x2d, 0x2d, + 0x2d, 0x20, 0x72, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x5f, 0x65, 0x6e, 0x64, + 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0d, 0x0a, 0x74, 0x79, + 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, + 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, + 0x29, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, + 0x20, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, + 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x3b, 0x0d, 0x0a, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x6e, + 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x3b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, + 0x6d, 0x5f, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x63, 0x75, 0x72, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, + 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x72, + 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, + 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, + 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, + 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, + 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, + 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, + 0x38, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, + 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, + 0x36, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, + 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, + 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, + 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, + 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, + 0x72, 0x65, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, + 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, + 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, + 0x7d, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, + 0x2f, 0x20, 0x46, 0x6f, 0x72, 0x20, 0x65, 0x61, 0x63, 0x68, 0x20, 0x69, + 0x6e, 0x70, 0x75, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, + 0x66, 0x69, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x20, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x20, 0x63, + 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, + 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x73, 0x20, 0x69, 0x74, 0x2e, 0x0d, + 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, + 0x20, 0x72, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x5f, 0x65, 0x6e, 0x64, 0x70, + 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, + 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x0d, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x63, 0x5f, + 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x72, + 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2c, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, + 0x61, 0x6c, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, + 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, + 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x2a, 0x70, 0x53, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x2c, + 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, + 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, + 0x65, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x73, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, + 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x70, 0x53, + 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x5b, 0x73, 0x6f, 0x72, 0x74, + 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, + 0x75, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, + 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, + 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, + 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x70, 0x72, 0x69, 0x76, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x72, 0x69, 0x76, + 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x20, 0x3d, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, + 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x3b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x63, 0x75, 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, + 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x63, 0x75, 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, + 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, + 0x2e, 0x6d, 0x5f, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, + 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, + 0x34, 0x5f, 0x74, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x55, + 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, + 0x20, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x73, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, + 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x69, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, + 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, + 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x65, 0x74, 0x63, + 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, + 0x74, 0x20, 0x6f, 0x72, 0x69, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x74, 0x63, 0x5f, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3e, 0x20, 0x63, 0x75, 0x72, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x69, 0x6e, 0x75, + 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, + 0x6e, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, + 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, + 0x63, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x63, 0x2b, 0x2b, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x73, 0x72, 0x63, + 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x72, 0x69, + 0x76, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x63, + 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, + 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, + 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, + 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, + 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, + 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x66, 0x61, + 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, + 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, + 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, + 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, + 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, + 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, + 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, + 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, + 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x73, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, + 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, + 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x20, 0x7c, + 0x7c, 0x0d, 0x0a, 0x09, 0x09, 0x20, 0x20, 0x20, 0x20, 0x20, 0x28, 0x28, + 0x6f, 0x72, 0x69, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x75, + 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x20, 0x26, + 0x26, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, + 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, + 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, + 0x20, 0x6f, 0x72, 0x69, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, + 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x64, + 0x69, 0x63, 0x65, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, + 0x20, 0x2d, 0x2d, 0x2d, 0x2d, 0x20, 0x66, 0x69, 0x6e, 0x64, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, + 0x5f, 0x66, 0x6f, 0x72, 0x5f, 0x65, 0x61, 0x63, 0x68, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, + 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, + 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, + 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x66, + 0x6f, 0x73, 0x63, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x73, + 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x09, 0x2f, 0x2f, + 0x20, 0x34, 0x78, 0x34, 0x20, 0x67, 0x72, 0x69, 0x64, 0x20, 0x6f, 0x66, + 0x20, 0x32, 0x2d, 0x62, 0x69, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x73, 0x0d, 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, 0x63, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, + 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, + 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, + 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, + 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x20, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x20, 0x20, + 0x2f, 0x2f, 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x20, + 0x35, 0x2d, 0x62, 0x69, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x52, 0x47, 0x42, + 0x2c, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x20, 0x68, 0x61, 0x73, 0x20, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x27, 0x73, 0x20, 0x69, 0x6e, 0x74, 0x65, + 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, + 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x3b, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x6f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, + 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x73, 0x3b, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x6e, 0x75, 0x6d, 0x62, + 0x65, 0x72, 0x20, 0x6f, 0x66, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, + 0x0d, 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, + 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, + 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, + 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, + 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, + 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x46, 0x6f, + 0x72, 0x20, 0x65, 0x61, 0x63, 0x68, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, + 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, 0x46, 0x69, 0x6e, 0x64, + 0x20, 0x74, 0x68, 0x65, 0x20, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, + 0x65, 0x64, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, + 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x73, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x6f, 0x77, + 0x65, 0x73, 0x74, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x0d, 0x0a, + 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x66, 0x69, 0x6e, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5f, 0x66, 0x6f, 0x72, 0x5f, 0x65, + 0x61, 0x63, 0x68, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x28, 0x0d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6f, + 0x73, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, + 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, + 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x66, 0x6f, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, + 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, + 0x73, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, + 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, + 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, + 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, + 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x20, + 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, + 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x73, 0x65, + 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x20, 0x2a, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x73, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x70, 0x42, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, + 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, + 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, + 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, + 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x74, + 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, 0x65, 0x74, 0x63, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, + 0x6e, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, 0x2c, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x5b, + 0x31, 0x36, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, + 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x29, 0x0d, 0x0a, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3c, 0x20, 0x34, 0x3b, + 0x20, 0x2b, 0x2b, 0x73, 0x65, 0x6c, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, + 0x20, 0x31, 0x36, 0x3b, 0x20, 0x2b, 0x2b, 0x69, 0x29, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x5d, 0x5b, 0x69, 0x5d, 0x20, + 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, + 0x61, 0x6e, 0x63, 0x65, 0x28, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, + 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, + 0x73, 0x65, 0x6c, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, + 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, + 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, + 0x6c, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3c, + 0x20, 0x34, 0x3b, 0x20, 0x2b, 0x2b, 0x73, 0x65, 0x6c, 0x29, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, + 0x69, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x2b, 0x2b, 0x69, 0x29, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x5d, 0x5b, + 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, + 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x2c, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x74, 0x72, 0x69, + 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x5d, 0x2c, 0x20, 0x66, 0x61, + 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, + 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, + 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3c, 0x20, 0x6e, 0x75, + 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, + 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x2b, 0x2b, + 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x73, 0x20, 0x3d, + 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, + 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, + 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, + 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x69, 0x2b, + 0x2b, 0x2c, 0x20, 0x73, 0x65, 0x6c, 0x73, 0x20, 0x3e, 0x3e, 0x3d, 0x20, + 0x32, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x65, 0x72, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, + 0x73, 0x20, 0x26, 0x20, 0x33, 0x5d, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x65, 0x72, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, + 0x3d, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x21, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x73, + 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x5b, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, + 0x20, 0x3d, 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, + 0x69, 0x63, 0x65, 0x73, 0x5b, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, + 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x2b, + 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, + 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x64, + 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, + 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, + 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, + 0x29, 0x20, 0x64, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, + 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, + 0x0a, 0x7d, 0x20, 0x64, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, + 0x2f, 0x20, 0x46, 0x6f, 0x72, 0x20, 0x65, 0x61, 0x63, 0x68, 0x20, 0x69, + 0x6e, 0x70, 0x75, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, + 0x44, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x73, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, + 0x20, 0x6c, 0x6f, 0x77, 0x65, 0x73, 0x74, 0x20, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x2c, 0x20, 0x67, 0x69, 0x76, 0x65, 0x6e, 0x20, 0x65, 0x61, 0x63, + 0x68, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x27, 0x73, 0x20, 0x70, 0x72, + 0x65, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x20, + 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, + 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x65, 0x73, + 0x2e, 0x20, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, + 0x6f, 0x69, 0x64, 0x20, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, + 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x28, + 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x64, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, + 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, + 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x65, 0x74, 0x63, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x61, 0x6e, 0x64, 0x5f, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, + 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x61, 0x6e, 0x64, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, + 0x0d, 0x0a, 0x09, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, + 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, + 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, + 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, + 0x69, 0x74, 0x28, 0x26, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, + 0x26, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x6f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, + 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x65, + 0x72, 0x72, 0x30, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x61, 0x72, + 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, + 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x2c, 0x20, + 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x20, 0x65, 0x72, 0x72, 0x31, 0x20, 0x3d, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, + 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, + 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, + 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, + 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x65, 0x72, 0x72, + 0x32, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, + 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, + 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, + 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x66, 0x61, + 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x20, 0x65, 0x72, 0x72, 0x33, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, + 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, + 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, + 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, + 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, + 0x6d, 0x69, 0x6e, 0x28, 0x6d, 0x69, 0x6e, 0x28, 0x65, 0x72, 0x72, 0x30, + 0x2c, 0x20, 0x65, 0x72, 0x72, 0x31, 0x29, 0x2c, 0x20, 0x65, 0x72, 0x72, + 0x32, 0x29, 0x2c, 0x20, 0x65, 0x72, 0x72, 0x33, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, + 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, + 0x20, 0x65, 0x72, 0x72, 0x32, 0x29, 0x20, 0x3f, 0x20, 0x32, 0x20, 0x3a, + 0x20, 0x33, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x31, 0x29, + 0x20, 0x3f, 0x20, 0x31, 0x20, 0x3a, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x30, + 0x29, 0x20, 0x3f, 0x20, 0x30, 0x20, 0x3a, 0x20, 0x62, 0x65, 0x73, 0x74, + 0x5f, 0x73, 0x65, 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x26, 0x6f, + 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x69, 0x20, 0x26, 0x20, 0x33, 0x2c, 0x20, 0x69, 0x20, 0x3e, 0x3e, + 0x20, 0x32, 0x2c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, + 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a +}; +unsigned int ocl_kernels_cl_len = 45361; diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_opencl.h b/Source/ThirdParty/basis_universal/encoder/basisu_opencl.h new file mode 100644 index 000000000..d849d79d0 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_opencl.h @@ -0,0 +1,143 @@ +// basisu_opencl.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Note: Undefine or set BASISU_SUPPORT_OPENCL to 0 to completely OpenCL support. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "basisu_enc.h" +#include "basisu_etc.h" + +namespace basisu +{ + bool opencl_init(bool force_serialization); + void opencl_deinit(); + bool opencl_is_available(); + + struct opencl_context; + + // Each thread calling OpenCL should have its own opencl_context_ptr. This corresponds to a OpenCL command queue. (Confusingly, we only use a single OpenCL device "context".) + typedef opencl_context* opencl_context_ptr; + + opencl_context_ptr opencl_create_context(); + void opencl_destroy_context(opencl_context_ptr context); + +#pragma pack(push, 1) + struct cl_pixel_block + { + color_rgba m_pixels[16]; // [y*4+x] + }; +#pragma pack(pop) + + // Must match BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE + const uint32_t OPENCL_ENCODE_ETC1S_MAX_PERMS = 165; + + bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks); + + bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms); + + // opencl_encode_etc1s_pixel_clusters + +#pragma pack(push, 1) + struct cl_pixel_cluster + { + uint64_t m_total_pixels; + uint64_t m_first_pixel_index; + }; +#pragma pack(pop) + + bool opencl_encode_etc1s_pixel_clusters( + opencl_context_ptr pContext, + etc_block* pOutput_blocks, + uint32_t total_clusters, + const cl_pixel_cluster *pClusters, + uint64_t total_pixels, + const color_rgba *pPixels, + const uint32_t *pPixel_weights, + bool perceptual, uint32_t total_perms); + + // opencl_refine_endpoint_clusterization + +#pragma pack(push, 1) + struct cl_block_info_struct + { + uint16_t m_first_cluster_ofs; + uint16_t m_num_clusters; + uint16_t m_cur_cluster_index; + uint8_t m_cur_cluster_etc_inten; + }; + + struct cl_endpoint_cluster_struct + { + color_rgba m_unscaled_color; + uint8_t m_etc_inten; + uint16_t m_cluster_index; + }; +#pragma pack(pop) + + bool opencl_refine_endpoint_clusterization( + opencl_context_ptr pContext, + const cl_block_info_struct *pPixel_block_info, + uint32_t total_clusters, + const cl_endpoint_cluster_struct *pCluster_info, + const uint32_t *pSorted_block_indices, + uint32_t* pOutput_cluster_indices, + bool perceptual); + + // opencl_find_optimal_selector_clusters_for_each_block + +#pragma pack(push, 1) + struct fosc_selector_struct + { + uint32_t m_packed_selectors; // 4x4 grid of 2-bit selectors + }; + + struct fosc_block_struct + { + color_rgba m_etc_color5_inten; // unscaled 5-bit block color in RGB, alpha has block's intensity index + uint32_t m_first_selector; // offset into selector table + uint32_t m_num_selectors; // number of selectors to check + }; + + struct fosc_param_struct + { + uint32_t m_total_blocks; + int m_perceptual; + }; +#pragma pack(pop) + + bool opencl_find_optimal_selector_clusters_for_each_block( + opencl_context_ptr pContext, + const fosc_block_struct* pInput_block_info, // one per block + uint32_t total_input_selectors, + const fosc_selector_struct* pInput_selectors, + const uint32_t* pSelector_cluster_indices, + uint32_t* pOutput_selector_cluster_indices, // one per block + bool perceptual); + +#pragma pack(push, 1) + struct ds_param_struct + { + uint32_t m_total_blocks; + int m_perceptual; + }; +#pragma pack(pop) + + bool opencl_determine_selectors( + opencl_context_ptr pContext, + const color_rgba* pInput_etc_color5_and_inten, + etc_block* pOutput_blocks, + bool perceptual); + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_pvrtc1_4.h b/Source/ThirdParty/basis_universal/encoder/basisu_pvrtc1_4.h new file mode 100644 index 000000000..80ca03b68 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_pvrtc1_4.h @@ -0,0 +1,465 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_gpu_texture.h" + +namespace basisu +{ + enum + { + PVRTC2_MIN_WIDTH = 16, + PVRTC2_MIN_HEIGHT = 8, + PVRTC4_MIN_WIDTH = 8, + PVRTC4_MIN_HEIGHT = 8 + }; + + struct pvrtc4_block + { + uint32_t m_modulation; + uint32_t m_endpoints; + + pvrtc4_block() : m_modulation(0), m_endpoints(0) { } + + inline bool operator== (const pvrtc4_block& rhs) const + { + return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); + } + + inline void clear() + { + m_modulation = 0; + m_endpoints = 0; + } + + inline bool get_block_uses_transparent_modulation() const + { + return (m_endpoints & 1) != 0; + } + + inline bool is_endpoint_opaque(uint32_t endpoint_index) const + { + static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; + return (m_endpoints & s_bitmasks[open_range_check(endpoint_index, 2U)]) != 0; + } + + // Returns raw endpoint or 8888 + color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; + + color_rgba get_endpoint_5554(uint32_t endpoint_index) const; + + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) + { + static const uint32_t s_comp_prec[4][4] = + { + // R0 G0 B0 A0 R1 G1 B1 A1 + { 4, 4, 3, 3 }, { 4, 4, 4, 3 }, // transparent endpoint + + { 5, 5, 4, 0 }, { 5, 5, 5, 0 } // opaque endpoint + }; + return s_comp_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][open_range_check(c, 4U)]; + } + + static color_rgba get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) + { + static const color_rgba s_color_prec[4] = + { + color_rgba(4, 4, 3, 3), color_rgba(4, 4, 4, 3), // transparent endpoint + color_rgba(5, 5, 4, 0), color_rgba(5, 5, 5, 0) // opaque endpoint + }; + return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_modulation >> ((y * 4 + x) * 2)) & 3; + } + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation = (m_modulation & (~(3 << n))) | (s << n); + assert(get_modulation(x, y) == s); + } + + // Scaled by 8 + inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const + { + static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 }, { 0, 4, 4, 8 } }; + return s_block_scales[block_uses_transparent_modulation]; + } + + // Scaled by 8 + inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const + { + return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; + } + + inline void byte_swap() + { + m_modulation = byteswap32(m_modulation); + m_endpoints = byteswap32(m_endpoints); + } + + // opaque endpoints: 554, 555 + // transparent endpoints: 3443, 3444 + inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) + { + assert(endpoint_index < 2); + const uint32_t m = m_endpoints & 1; + uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; + + uint32_t packed; + + if (opaque_endpoint) + { + if (!endpoint_index) + { + // 554 + // 1RRRRRGGGGGBBBBM + assert((r < 32) && (g < 32) && (b < 16)); + packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m; + } + else + { + // 555 + // 1RRRRRGGGGGBBBBB + assert((r < 32) && (g < 32) && (b < 32)); + packed = 0x8000 | (r << 10) | (g << 5) | b; + } + } + else + { + if (!endpoint_index) + { + // 3443 + // 0AAA RRRR GGGG BBBM + assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m; + } + else + { + // 3444 + // 0AAA RRRR GGGG BBBB + assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | b; + } + } + + assert(packed <= 0xFFFF); + + if (endpoint_index) + m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); + else + m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; + } + }; + + typedef vector2D pvrtc4_block_vector2D; + + uint32_t pvrtc4_swizzle_uv(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos); + + class pvrtc4_image + { + public: + inline pvrtc4_image() : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) + { + } + + inline pvrtc4_image(uint32_t width, uint32_t height) : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) + { + resize(width, height); + } + + inline void clear() + { + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks.clear(); + m_uses_alpha = false; + } + + inline void resize(uint32_t width, uint32_t height) + { + if ((width == m_width) && (height == m_height)) + return; + + m_width = width; + m_height = height; + + m_block_width = (width + 3) >> 2; + m_block_height = (height + 3) >> 2; + + m_blocks.resize(m_block_width, m_block_height); + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + + inline const pvrtc4_block_vector2D &get_blocks() const { return m_blocks; } + inline pvrtc4_block_vector2D &get_blocks() { return m_blocks; } + + inline uint32_t get_total_blocks() const { return m_block_width * m_block_height; } + + inline bool get_uses_alpha() const { return m_uses_alpha; } + inline void set_uses_alpha(bool uses_alpha) { m_uses_alpha = uses_alpha; } + + inline bool are_blocks_equal(const pvrtc4_image& rhs) const + { + return m_blocks == rhs.m_blocks; + } + + inline void set_to_black() + { +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + + memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + } + + inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const + { + return m_blocks(bx, by).get_block_uses_transparent_modulation(); + } + + inline bool is_endpoint_opaque(uint32_t bx, uint32_t by, uint32_t endpoint_index) const + { + return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); + } + + color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const + { + assert((bx < m_block_width) && (by < m_block_height)); + return m_blocks(bx, by).get_endpoint(endpoint_index, unpack); + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); + } + + // Returns true if the block uses transparent modulation. + bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; + + color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; + + inline color_rgba get_pixel(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return get_pixel(x, y, m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3)); + } + + void deswizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks(x, y) = temp[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)]; + } + + void swizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)] = temp(x, y); + } + + void unpack_all_pixels(image& img) const + { + img.crop(m_width, m_height); + + for (uint32_t y = 0; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + img(x, y) = get_pixel(x, y); + } + + void unpack_block(image &dst, uint32_t block_x, uint32_t block_y) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + dst(x, y) = get_pixel(block_x * 4 + x, block_y * 4 + y); + } + + inline int wrap_x(int x) const + { + return posmod(x, m_width); + } + + inline int wrap_y(int y) const + { + return posmod(y, m_height); + } + + inline int wrap_block_x(int bx) const + { + return posmod(bx, m_block_width); + } + + inline int wrap_block_y(int by) const + { + return posmod(by, m_block_height); + } + + inline vec2F get_interpolation_factors(uint32_t x, uint32_t y) const + { + // 0 1 2 3 + // 2 3 0 1 + // .5 .75 0 .25 + static const float s_interp[4] = { 2, 3, 0, 1 }; + return vec2F(s_interp[x & 3], s_interp[y & 3]); + } + + inline color_rgba interpolate(int x, int y, + const color_rgba& p, const color_rgba& q, + const color_rgba& r, const color_rgba& s) const + { + static const int s_interp[4] = { 2, 3, 0, 1 }; + const int u_interp = s_interp[x & 3]; + const int v_interp = s_interp[y & 3]; + + color_rgba result; + + for (uint32_t c = 0; c < 4; c++) + { + int t = p[c] * 4 + u_interp * ((int)q[c] - (int)p[c]); + int b = r[c] * 4 + u_interp * ((int)s[c] - (int)r[c]); + int v = t * 4 + v_interp * (b - t); + if (c < 3) + { + v >>= 1; + v += (v >> 5); + } + else + { + v += (v >> 4); + } + assert((v >= 0) && (v < 256)); + result[c] = static_cast(v); + } + + return result; + } + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); + } + + inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) + { + color_rgba v[4]; + get_interpolated_colors(x, y, v); + + uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); + uint32_t best_v = 0; + for (uint32_t i = 1; i < 4; i++) + { + uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); + if (dist < best_dist) + { + best_dist = dist; + best_v = i; + } + } + + if (record) + set_modulation(x, y, best_v); + + return best_dist; + } + + inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); + } + } + + return total_error; + } + + inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); + + if ((threshold_error) && (total_error >= threshold_error)) + return total_error; + } + } + + return total_error; + } + + uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); + + inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) + { + assert(m_width == img.get_width()); + assert(m_height == img.get_height()); + + uint64_t total_error = 0; + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); + + return total_error; + } + + public: + uint32_t m_width, m_height; + pvrtc4_block_vector2D m_blocks; + uint32_t m_block_width, m_block_height; + + bool m_uses_alpha; + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_resampler.h b/Source/ThirdParty/basis_universal/encoder/basisu_resampler.h new file mode 100644 index 000000000..fc1918ec8 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_resampler.h @@ -0,0 +1,198 @@ +// basisu_resampler.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" + +#define BASISU_RESAMPLER_DEBUG_OPS (0) +#define BASISU_RESAMPLER_DEFAULT_FILTER "lanczos4" +#define BASISU_RESAMPLER_MAX_DIMENSION (16384) + +namespace basisu +{ + // float or double + typedef float Resample_Real; + + class Resampler + { + public: + typedef Resample_Real Sample; + + struct Contrib + { + Resample_Real weight; + uint16_t pixel; + }; + + struct Contrib_List + { + uint16_t n; + Contrib *p; + }; + + enum Boundary_Op + { + BOUNDARY_WRAP = 0, + BOUNDARY_REFLECT = 1, + BOUNDARY_CLAMP = 2 + }; + + enum Status + { + STATUS_OKAY = 0, + STATUS_OUT_OF_MEMORY = 1, + STATUS_BAD_FILTER_NAME = 2, + STATUS_SCAN_BUFFER_FULL = 3 + }; + + // src_x/src_y - Input dimensions + // dst_x/dst_y - Output dimensions + // boundary_op - How to sample pixels near the image boundaries + // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high + // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler + // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) + Resampler( + int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op = BOUNDARY_CLAMP, + Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, + const char *Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER, + Contrib_List *Pclist_x = NULL, + Contrib_List *Pclist_y = NULL, + Resample_Real filter_x_scale = 1.0f, + Resample_Real filter_y_scale = 1.0f, + Resample_Real src_x_ofs = 0.0f, + Resample_Real src_y_ofs = 0.0f); + + ~Resampler(); + + // Reinits resampler so it can handle another frame. + void restart(); + + // false on out of memory. + bool put_line(const Sample *Psrc); + + // NULL if no scanlines are currently available (give the resampler more scanlines!) + const Sample *get_line(); + + Status status() const + { + return m_status; + } + + // Returned contributor lists can be shared with another Resampler. + void get_clists(Contrib_List **ptr_clist_x, Contrib_List **ptr_clist_y); + Contrib_List *get_clist_x() const + { + return m_Pclist_x; + } + Contrib_List *get_clist_y() const + { + return m_Pclist_y; + } + + // Filter accessors. + static int get_filter_num(); + static const char *get_filter_name(int filter_num); + + static Contrib_List *make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real(*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs); + + static void free_clist(Contrib_List* p) { if (p) { free(p->p); free(p); } } + + private: + Resampler(); + Resampler(const Resampler &o); + Resampler &operator=(const Resampler &o); + +#ifdef BASISU_RESAMPLER_DEBUG_OPS + int total_ops; +#endif + + int m_intermediate_x; + + int m_resample_src_x; + int m_resample_src_y; + int m_resample_dst_x; + int m_resample_dst_y; + + Boundary_Op m_boundary_op; + + Sample *m_Pdst_buf; + Sample *m_Ptmp_buf; + + Contrib_List *m_Pclist_x; + Contrib_List *m_Pclist_y; + + bool m_clist_x_forced; + bool m_clist_y_forced; + + bool m_delay_x_resample; + + int *m_Psrc_y_count; + uint8_t *m_Psrc_y_flag; + + // The maximum number of scanlines that can be buffered at one time. + enum + { + MAX_SCAN_BUF_SIZE = BASISU_RESAMPLER_MAX_DIMENSION + }; + + struct Scan_Buf + { + int scan_buf_y[MAX_SCAN_BUF_SIZE]; + Sample *scan_buf_l[MAX_SCAN_BUF_SIZE]; + }; + + Scan_Buf *m_Pscan_buf; + + int m_cur_src_y; + int m_cur_dst_y; + + Status m_status; + + void resample_x(Sample *Pdst, const Sample *Psrc); + void scale_y_mov(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void scale_y_add(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void clamp(Sample *Pdst, int n); + void resample_y(Sample *Pdst); + + static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); + + inline int count_ops(Contrib_List *Pclist, int k) + { + int i, t = 0; + for (i = 0; i < k; i++) + t += Pclist[i].n; + return (t); + } + + Resample_Real m_lo; + Resample_Real m_hi; + + inline Resample_Real clamp_sample(Resample_Real f) const + { + if (f < m_lo) + f = m_lo; + else if (f > m_hi) + f = m_hi; + return f; + } + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_resampler_filters.h b/Source/ThirdParty/basis_universal/encoder/basisu_resampler_filters.h new file mode 100644 index 000000000..9d21f950d --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_resampler_filters.h @@ -0,0 +1,47 @@ +// basisu_resampler_filters.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "../transcoder/basisu.h" + +namespace basisu +{ + typedef float (*resample_filter_func)(float t); + + struct resample_filter + { + const char *name; + resample_filter_func func; + float support; + }; + + extern const resample_filter g_resample_filters[]; + extern const int g_num_resample_filters; + + const float BASISU_BOX_FILTER_SUPPORT = 0.5f; + float box_filter(float t); /* pulse/Fourier window */ + + const float BASISU_TENT_FILTER_SUPPORT = 1.0f; + float tent_filter(float t); /* box (*) box, bilinear/triangle */ + + const float BASISU_GAUSSIAN_FILTER_SUPPORT = 1.25f; + float gaussian_filter(float t); // with blackman window + + const float BASISU_BELL_FILTER_SUPPORT = 1.5f; + float bell_filter(float t); /* box (*) box (*) box */ + + int find_resample_filter(const char *pName); + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_ssim.h b/Source/ThirdParty/basis_universal/encoder/basisu_ssim.h new file mode 100644 index 000000000..51cd2d78f --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_ssim.h @@ -0,0 +1,44 @@ +// basisu_ssim.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" + +namespace basisu +{ + float gauss(int x, int y, float sigma_sqr); + + enum + { + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 + }; + + void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags = 0); + + void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift); + void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst); + void add_image(const imagef &src1, const imagef &src2, imagef &dst); + void adds_image(const imagef &src, const vec4F &value, imagef &dst); + void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + vec4F avg_image(const imagef &src); + + void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + + vec4F compute_ssim(const imagef &a, const imagef &b); + vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601); + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_uastc_enc.h b/Source/ThirdParty/basis_universal/encoder/basisu_uastc_enc.h new file mode 100644 index 000000000..7cd3c622e --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_uastc_enc.h @@ -0,0 +1,140 @@ +// basisu_uastc_enc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_etc.h" + +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + const uint32_t TOTAL_PACK_UASTC_LEVELS = 5; + + enum + { + // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. + // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. + // Avg. 43.45 dB + cPackUASTCLevelFastest = 0, + + // Faster is ~3x slower than fastest. It supports 9 modes. + // Avg. 46.49 dB + cPackUASTCLevelFaster = 1, + + // Default is ~5.5x slower than fastest. It supports 14 modes. + // Avg. 47.47 dB + cPackUASTCLevelDefault = 2, + + // Slower is ~14.5x slower than fastest. It supports all 18 modes. + // Avg. 48.01 dB + cPackUASTCLevelSlower = 3, + + // VerySlow is ~200x slower than fastest. + // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. + // Avg. 48.24 dB + cPackUASTCLevelVerySlow = 4, + + cPackUASTCLevelMask = 0xF, + + // By default the encoder tries to strike a balance between UASTC and transcoded BC7 quality. + // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. + cPackUASTCFavorUASTCError = 8, + cPackUASTCFavorBC7Error = 16, + + cPackUASTCETC1FasterHints = 64, + cPackUASTCETC1FastestHints = 128, + cPackUASTCETC1DisableFlipAndIndividual = 256, + + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) + cPackUASTCFavorSimplerModes = 512, + }; + + // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). + // block: Reference to destination UASTC block. + // level: Controls compression speed vs. performance tradeoff. + void encode_uastc(const uint8_t* pRGBAPixels, basist::uastc_block& output_block, uint32_t flags = cPackUASTCLevelDefault); + + struct uastc_encode_results + { + uint32_t m_uastc_mode; + uint32_t m_common_pattern; + basist::astc_block_desc m_astc; + color_rgba m_solid_color; + uint64_t m_astc_err; + }; + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); + + const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; + + const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; + const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; + + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. + // Larger values will result in more blocks being protected from too much distortion. + const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; + + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. + // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. + const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; + + struct uastc_rdo_params + { + uastc_rdo_params() + { + clear(); + } + + void clear() + { + m_lz_dict_size = UASCT_RDO_DEFAULT_LZ_DICT_SIZE; + m_lambda = 0.5f; + m_max_allowed_rms_increase_ratio = UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO; + m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; + m_endpoint_refinement = true; + m_lz_literal_cost = 100; + + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; + m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; + } + + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lz_dict_size; + + // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + float m_skip_block_rms_thresh; + + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + bool m_endpoint_refinement; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_error_scale; + + uint32_t m_lz_literal_cost; + }; + + // num_blocks, pBlocks: Number of blocks and pointer to UASTC blocks to process. + // pBlock_pixels: Pointer to an array of 4x4 blocks containing the original texture pixels. This is NOT a raster image, but a pointer to individual 4x4 blocks. + // flags: Pass in the same flags used to encode the UASTC blocks. The flags are used to reencode the transcode hints in the same way. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params ¶ms, uint32_t flags = cPackUASTCLevelDefault, job_pool* pJob_pool = nullptr, uint32_t total_jobs = 0); +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.h b/Source/ThirdParty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.h new file mode 100644 index 000000000..e0a121c56 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.h @@ -0,0 +1,180 @@ +// basisu_uastc_hdr_4x4_enc.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" +#include "basisu_astc_hdr_common.h" + +namespace basisu +{ + struct uastc_hdr_4x4_codec_options : astc_hdr_codec_base_options + { + float m_bc6h_err_weight; + + bool m_use_solid; + + bool m_use_mode11_part1; + bool m_mode11_uber_mode; + uint32_t m_first_mode11_weight_ise_range; + uint32_t m_last_mode11_weight_ise_range; + bool m_mode11_direct_only; + int32_t m_first_mode11_submode; + int32_t m_last_mode11_submode; + + bool m_use_mode7_part1; + uint32_t m_first_mode7_part1_weight_ise_range; + uint32_t m_last_mode7_part1_weight_ise_range; + + bool m_use_mode7_part2; + uint32_t m_mode7_part2_part_masks; + uint32_t m_first_mode7_part2_weight_ise_range; + uint32_t m_last_mode7_part2_weight_ise_range; + + bool m_use_mode11_part2; + uint32_t m_mode11_part2_part_masks; + uint32_t m_first_mode11_part2_weight_ise_range; + uint32_t m_last_mode11_part2_weight_ise_range; + + bool m_refine_weights; + + uint32_t m_level; + + bool m_use_estimated_partitions; + uint32_t m_max_estimated_partitions; + + uastc_hdr_4x4_codec_options(); + + void init(); + + // TODO: set_quality_level() is preferred to configure the codec for transcoding purposes. + static const int cMinLevel = 0; + static const int cMaxLevel = 4; + static const int cDefaultLevel = 1; + void set_quality_level(int level); + + private: + void set_quality_best(); + void set_quality_normal(); + void set_quality_fastest(); + }; + + struct astc_hdr_4x4_pack_results + { + double m_best_block_error; + double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance + + // Encoder results (logical ASTC block) + astc_helpers::log_astc_block m_best_blk; + + // For statistical use + uint32_t m_best_submodes[2]; + uint32_t m_best_pat_index; + bool m_constrained_weights; + + bool m_improved_via_refinement_flag; + + // Only valid if the block is solid + basist::astc_blk m_solid_blk; + + // The BC6H transcoded block + basist::bc6h_block m_bc6h_block; + + // Solid color/void extent flag + bool m_is_solid; + + void clear() + { + m_best_block_error = 1e+30f; + m_bc6h_block_error = 1e+30f; + + m_best_blk.clear(); + m_best_blk.m_grid_width = 4; + m_best_blk.m_grid_height = 4; + m_best_blk.m_endpoint_ise_range = 20; // 0-255 + + clear_obj(m_best_submodes); + + m_best_pat_index = 0; + m_constrained_weights = false; + + clear_obj(m_bc6h_block); + + m_is_solid = false; + m_improved_via_refinement_flag = false; + } + }; + + // Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels. + // Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes), + // and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format. + // The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future. + // This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks. + // + // Parameters: + // pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack + // pPacked_block - A pointer to the packed ASTC HDR block + // coptions - Codec options + // pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr. + // + // Requirements: + // astc_hdr_enc_init() MUST have been called first to initialized the codec. + // Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504). + // Normal values and denormals are okay. + bool astc_hdr_4x4_enc_block( + const float* pRGBPixels, const basist::half_float *pRGBPixelsHalf, + const uastc_hdr_4x4_codec_options& coptions, + basisu::vector &all_results); + + bool astc_hdr_4x4_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_4x4_pack_results& results); + + bool astc_hdr_4x4_refine_weights(const basist::half_float* pSource_block, astc_hdr_4x4_pack_results& cur_results, const uastc_hdr_4x4_codec_options& coptions, float bc6h_weight, bool* pImproved_flag); + + struct astc_hdr_4x4_block_stats + { + std::mutex m_mutex; + + uint32_t m_total_blocks; + uint32_t m_total_2part, m_total_solid; + uint32_t m_total_mode7_1part, m_total_mode7_2part; + uint32_t m_total_mode11_1part, m_total_mode11_2part; + uint32_t m_total_mode11_1part_constrained_weights; + + uint32_t m_weight_range_hist_7[11]; + uint32_t m_weight_range_hist_7_2part[11]; + uint32_t m_mode7_submode_hist[6]; + + uint32_t m_weight_range_hist_11[11]; + uint32_t m_weight_range_hist_11_2part[11]; + uint32_t m_mode11_submode_hist[9]; + + uint32_t m_part_hist[32]; + + uint32_t m_total_refined; + + astc_hdr_4x4_block_stats() { clear(); } + + void clear() + { + std::lock_guard lck(m_mutex); + + m_total_blocks = 0; + m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0; + m_total_mode11_1part_constrained_weights = 0; + m_total_refined = 0; + + clear_obj(m_weight_range_hist_11); + clear_obj(m_weight_range_hist_11_2part); + clear_obj(m_weight_range_hist_7); + clear_obj(m_weight_range_hist_7_2part); + clear_obj(m_mode7_submode_hist); + clear_obj(m_mode11_submode_hist); + clear_obj(m_part_hist); + } + + void update(const astc_hdr_4x4_pack_results& log_blk); + + void print(); + }; + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api.h b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api.h new file mode 100644 index 000000000..92266bc41 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api.h @@ -0,0 +1,58 @@ +// File: basisu_wasm_api.h +#pragma once +#include "basisu_wasm_api_common.h" + +BU_WASM_EXPORT("bu_get_version") +uint32_t bu_get_version(); + +BU_WASM_EXPORT("bu_enable_debug_printf") +void bu_enable_debug_printf(uint32_t flag); + +BU_WASM_EXPORT("bu_init") +void bu_init(); + +BU_WASM_EXPORT("bu_alloc") +uint64_t bu_alloc(uint64_t size); + +BU_WASM_EXPORT("bu_free") +void bu_free(uint64_t ofs); + +BU_WASM_EXPORT("bu_new_comp_params") +uint64_t bu_new_comp_params(); + +BU_WASM_EXPORT("bu_delete_comp_params") +wasm_bool_t bu_delete_comp_params(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_size") +uint64_t bu_comp_params_get_comp_data_size(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_ofs") +uint64_t bu_comp_params_get_comp_data_ofs(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_clear") +wasm_bool_t bu_comp_params_clear(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_set_image_rgba32") +wasm_bool_t bu_comp_params_set_image_rgba32( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes); + +BU_WASM_EXPORT("bu_comp_params_set_image_float_rgba") +wasm_bool_t bu_comp_params_set_image_float_rgba( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes); + +BU_WASM_EXPORT("bu_compress_texture") +wasm_bool_t bu_compress_texture( + uint64_t params_ofs, + uint32_t desired_basis_tex_format, + int quality_level, int effort_level, + uint64_t flags_and_quality, + float low_level_uastc_rdo_or_dct_quality); + diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api_common.h b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api_common.h new file mode 100644 index 000000000..d3fe1ae39 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_api_common.h @@ -0,0 +1,156 @@ +// File: basisu_wasm_api_common.h +#pragma once +#include "stdint.h" + +#if defined(__wasm__) + #if defined(__cplusplus) + #define BU_WASM_EXPORT(name) __attribute__((export_name(name))) extern "C" + #else + #define BU_WASM_EXPORT(name) __attribute__((export_name(name))) + #endif +#elif defined(__cplusplus) + #define BU_WASM_EXPORT(name) extern "C" +#else + #define BU_WASM_EXPORT(name) +#endif + +// wasm_bool_t is an alias for uint32_t +typedef uint32_t wasm_bool_t; + +// Compression constants + +#define BU_QUALITY_MIN 0 +#define BU_QUALITY_MAX 100 + +#define BU_EFFORT_MIN 0 +#define BU_EFFORT_MAX 10 +#define BU_EFFORT_SUPER_FAST = 0 +#define BU_EFFORT_FAST = 2 +#define BU_EFFORT_NORMAL = 5 +#define BU_EFFORT_DEFAULT = 2 +#define BU_EFFORT_SLOW = 8 +#define BU_EFFORT_VERY_SLOW = 10 + +#define BU_COMP_FLAGS_NONE (0) +#define BU_COMP_FLAGS_USE_OPENCL (1 << 8 ) +#define BU_COMP_FLAGS_THREADED (1 << 9 ) +#define BU_COMP_FLAGS_DEBUG_OUTPUT (1 << 10) +#define BU_COMP_FLAGS_KTX2_OUTPUT (1 << 11) +#define BU_COMP_FLAGS_KTX2_UASTC_ZSTD (1 << 12) +#define BU_COMP_FLAGS_SRGB (1 << 13) +#define BU_COMP_FLAGS_GEN_MIPS_CLAMP (1 << 14) +#define BU_COMP_FLAGS_GEN_MIPS_WRAP (1 << 15) +#define BU_COMP_FLAGS_Y_FLIP (1 << 16) +#define BU_COMP_FLAGS_PRINT_STATS (1 << 18) +#define BU_COMP_FLAGS_PRINT_STATUS (1 << 19) +#define BU_COMP_FLAGS_DEBUG_IMAGES (1 << 20) +#define BU_COMP_FLAGS_REC2020 (1 << 21) +#define BU_COMP_FLAGS_VALIDATE_OUTPUT (1 << 22) + +#define BU_COMP_FLAGS_XUASTC_LDR_FULL_ARITH (0) +#define BU_COMP_FLAGS_XUASTC_LDR_HYBRID (1 << 23) +#define BU_COMP_FLAGS_XUASTC_LDR_FULL_ZSTD (2 << 23) +#define BU_COMP_FLAGS_XUASTC_LDR_SYNTAX_SHIFT (23) +#define BU_COMP_FLAGS_XUASTC_LDR_SYNTAX_MASK (3) + +#define BU_COMP_FLAGS_TEXTURE_TYPE_2D (0 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_2D_ARRAY (1 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_CUBEMAP_ARRAY (2 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_VIDEO_FRAMES (3 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_SHIFT (25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_MASK (3) + +#define BU_COMP_FLAGS_VERBOSE (BU_COMP_FLAGS_DEBUG_OUTPUT | BU_COMP_FLAGS_PRINT_STATS | BU_COMP_FLAGS_PRINT_STATUS) + +// basist::basis_tex_format: the supported .ktx2 (and .basis) file format types +#define BTF_ETC1S 0 +#define BTF_UASTC_LDR_4X4 1 +#define BTF_UASTC_HDR_4X4 2 +#define BTF_ASTC_HDR_6X6 3 +#define BTF_UASTC_HDR_6X6 4 +#define BTF_XUASTC_LDR_4X4 5 +#define BTF_XUASTC_LDR_5X4 6 +#define BTF_XUASTC_LDR_5X5 7 +#define BTF_XUASTC_LDR_6X5 8 +#define BTF_XUASTC_LDR_6X6 9 +#define BTF_XUASTC_LDR_8X5 10 +#define BTF_XUASTC_LDR_8X6 11 +#define BTF_XUASTC_LDR_10X5 12 +#define BTF_XUASTC_LDR_10X6 13 +#define BTF_XUASTC_LDR_8X8 14 +#define BTF_XUASTC_LDR_10X8 15 +#define BTF_XUASTC_LDR_10X10 16 +#define BTF_XUASTC_LDR_12X10 17 +#define BTF_XUASTC_LDR_12X12 18 +#define BTF_ASTC_LDR_4X4 19 +#define BTF_ASTC_LDR_5X4 20 +#define BTF_ASTC_LDR_5X5 21 +#define BTF_ASTC_LDR_6X5 22 +#define BTF_ASTC_LDR_6X6 23 +#define BTF_ASTC_LDR_8X5 24 +#define BTF_ASTC_LDR_8X6 25 +#define BTF_ASTC_LDR_10X5 26 +#define BTF_ASTC_LDR_10X6 27 +#define BTF_ASTC_LDR_8X8 28 +#define BTF_ASTC_LDR_10X8 29 +#define BTF_ASTC_LDR_10X10 30 +#define BTF_ASTC_LDR_12X10 31 +#define BTF_ASTC_LDR_12X12 32 +#define BTF_TOTAL_FORMATS 33 + +// Transcoding constants + +// basist::transcoder_texture_format: the supported transcode GPU texture formats +#define TF_ETC1_RGB 0 +#define TF_ETC2_RGBA 1 +#define TF_BC1_RGB 2 +#define TF_BC3_RGBA 3 +#define TF_BC4_R 4 +#define TF_BC5_RG 5 +#define TF_BC7_RGBA 6 +#define TF_PVRTC1_4_RGB 8 +#define TF_PVRTC1_4_RGBA 9 +#define TF_ASTC_LDR_4X4_RGBA 10 +#define TF_ATC_RGB 11 +#define TF_ATC_RGBA 12 +#define TF_FXT1_RGB 17 +#define TF_PVRTC2_4_RGB 18 +#define TF_PVRTC2_4_RGBA 19 +#define TF_ETC2_EAC_R11 20 +#define TF_ETC2_EAC_RG11 21 +#define TF_BC6H 22 +#define TF_ASTC_HDR_4X4_RGBA 23 +#define TF_RGBA32 13 +#define TF_RGB565 14 +#define TF_BGR565 15 +#define TF_RGBA4444 16 +#define TF_RGB_HALF 24 +#define TF_RGBA_HALF 25 +#define TF_RGB_9E5 26 +#define TF_ASTC_HDR_6X6_RGBA 27 +#define TF_ASTC_LDR_5X4_RGBA 28 +#define TF_ASTC_LDR_5X5_RGBA 29 +#define TF_ASTC_LDR_6X5_RGBA 30 +#define TF_ASTC_LDR_6X6_RGBA 31 +#define TF_ASTC_LDR_8X5_RGBA 32 +#define TF_ASTC_LDR_8X6_RGBA 33 +#define TF_ASTC_LDR_10X5_RGBA 34 +#define TF_ASTC_LDR_10X6_RGBA 35 +#define TF_ASTC_LDR_8X8_RGBA 36 +#define TF_ASTC_LDR_10X8_RGBA 37 +#define TF_ASTC_LDR_10X10_RGBA 38 +#define TF_ASTC_LDR_12X10_RGBA 39 +#define TF_ASTC_LDR_12X12_RGBA 40 +#define TF_TOTAL_TEXTURE_FORMATS 41 + +// basist::basisu_decode_flags: Transcode decode flags (bt_ktx2_transcode_image_level decode_flags parameter, logically OR'd) +#define DECODE_FLAGS_PVRTC_DECODE_TO_NEXT_POW2 2 +#define DECODE_FLAGS_TRANSCODE_ALPHA_DATA_TO_OPAQUE_FORMATS 4 +#define DECODE_FLAGS_BC1_FORBID_THREE_COLOR_BLOCKS 8 +#define DECODE_FLAGS_OUTPUT_HAS_ALPHA_INDICES 16 +#define DECODE_FLAGS_HIGH_QUALITY 32 +#define DECODE_FLAGS_NO_ETC1S_CHROMA_FILTERING 64 +#define DECODE_FLAGS_NO_DEBLOCK_FILTERING 128 +#define DECODE_FLAGS_STRONGER_DEBLOCK_FILTERING 256 +#define DECODE_FLAGS_FORCE_DEBLOCK_FILTERING 512 +#define DECODE_FLAGS_XUASTC_LDR_DISABLE_FAST_BC7_TRANSCODING 1024 diff --git a/Source/ThirdParty/basis_universal/encoder/basisu_wasm_transcoder_api.h b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_transcoder_api.h new file mode 100644 index 000000000..a7389acee --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/basisu_wasm_transcoder_api.h @@ -0,0 +1,216 @@ +// File: basisu_wasm_transcoder_api.h - Transcoding API support for WASM WASI modules and Python native support. +#pragma once +#include "basisu_wasm_api_common.h" + +// High-level functions + +BU_WASM_EXPORT("bt_get_version") +uint32_t bt_get_version(); + +BU_WASM_EXPORT("bt_enable_debug_printf") +void bt_enable_debug_printf(uint32_t flag); + +BU_WASM_EXPORT("bt_init") +void bt_init(); + +BU_WASM_EXPORT("bt_alloc") +uint64_t bt_alloc(uint64_t size); + +BU_WASM_EXPORT("bt_free") +void bt_free(uint64_t ofs); + +// basis_tex_format helpers + +BU_WASM_EXPORT("bt_basis_tex_format_is_xuastc_ldr") +wasm_bool_t bt_basis_tex_format_is_xuastc_ldr(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_astc_ldr") +wasm_bool_t bt_basis_tex_format_is_astc_ldr(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_width") +uint32_t bt_basis_tex_format_get_block_width(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_height") +uint32_t bt_basis_tex_format_get_block_height(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_hdr") +wasm_bool_t bt_basis_tex_format_is_hdr(uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_ldr") +wasm_bool_t bt_basis_tex_format_is_ldr(uint32_t basis_tex_format_u32); + +// transcoder_texture_format helpers + +BU_WASM_EXPORT("bt_basis_get_bytes_per_block_or_pixel") +uint32_t bt_basis_get_bytes_per_block_or_pixel(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_has_alpha") +wasm_bool_t bt_basis_transcoder_format_has_alpha(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_hdr") +wasm_bool_t bt_basis_transcoder_format_is_hdr(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_ldr") +wasm_bool_t bt_basis_transcoder_format_is_ldr(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_texture_format_is_astc") +wasm_bool_t bt_basis_transcoder_texture_format_is_astc(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_uncompressed") +wasm_bool_t bt_basis_transcoder_format_is_uncompressed(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_uncompressed_bytes_per_pixel") +uint32_t bt_basis_get_uncompressed_bytes_per_pixel(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_block_width") +uint32_t bt_basis_get_block_width(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_block_height") +uint32_t bt_basis_get_block_height(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_transcoder_texture_format_from_basis_tex_format") +uint32_t bt_basis_get_transcoder_texture_format_from_basis_tex_format(uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_is_format_supported") +wasm_bool_t bt_basis_is_format_supported(uint32_t transcoder_texture_format_u32, uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_compute_transcoded_image_size_in_bytes") +uint32_t bt_basis_compute_transcoded_image_size_in_bytes(uint32_t transcoder_texture_format_u32, uint32_t orig_width, uint32_t orig_height); + +// Transcoding +BU_WASM_EXPORT("bt_ktx2_open") +uint64_t bt_ktx2_open(uint64_t data_mem_ofs, uint32_t data_len); + +BU_WASM_EXPORT("bt_ktx2_close") +void bt_ktx2_close(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_width") +uint32_t bt_ktx2_get_width(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_height") +uint32_t bt_ktx2_get_height(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_levels") +uint32_t bt_ktx2_get_levels(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_faces") +uint32_t bt_ktx2_get_faces(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_layers") +uint32_t bt_ktx2_get_layers(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_basis_tex_format") +uint32_t bt_ktx2_get_basis_tex_format(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_etc1s") +wasm_bool_t bt_ktx2_is_etc1s(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_uastc_ldr_4x4") +wasm_bool_t bt_ktx2_is_uastc_ldr_4x4(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr") +wasm_bool_t bt_ktx2_is_hdr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr_4x4") +wasm_bool_t bt_ktx2_is_hdr_4x4(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr_6x6") +wasm_bool_t bt_ktx2_is_hdr_6x6(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_ldr") +wasm_bool_t bt_ktx2_is_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_astc_ldr") +wasm_bool_t bt_ktx2_is_astc_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_xuastc_ldr") +wasm_bool_t bt_ktx2_is_xuastc_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_block_width") +uint32_t bt_ktx2_get_block_width(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_block_height") +uint32_t bt_ktx2_get_block_height(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_has_alpha") +wasm_bool_t bt_ktx2_has_alpha(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_model") +uint32_t bt_ktx2_get_dfd_color_model(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_primaries") +uint32_t bt_ktx2_get_dfd_color_primaries(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_transfer_func") +uint32_t bt_ktx2_get_dfd_transfer_func(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_srgb") +wasm_bool_t bt_ktx2_is_srgb(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_flags") +uint32_t bt_ktx2_get_dfd_flags(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_total_samples") +uint32_t bt_ktx2_get_dfd_total_samples(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id0") +uint32_t bt_ktx2_get_dfd_channel_id0(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id1") +uint32_t bt_ktx2_get_dfd_channel_id1(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_video") +wasm_bool_t bt_ktx2_is_video(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier") +float bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_width") +uint32_t bt_ktx2_get_level_orig_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_height") +uint32_t bt_ktx2_get_level_orig_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_width") +uint32_t bt_ktx2_get_level_actual_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_height") +uint32_t bt_ktx2_get_level_actual_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_x") +uint32_t bt_ktx2_get_level_num_blocks_x(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_y") +uint32_t bt_ktx2_get_level_num_blocks_y(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_total_blocks") +uint32_t bt_ktx2_get_level_total_blocks(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_alpha_flag") +wasm_bool_t bt_ktx2_get_level_alpha_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_iframe_flag") +wasm_bool_t bt_ktx2_get_level_iframe_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_start_transcoding") +wasm_bool_t bt_ktx2_start_transcoding(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_create_transcode_state") +uint64_t bt_ktx2_create_transcode_state(); + +BU_WASM_EXPORT("bt_ktx2_destroy_transcode_state") +void bt_ktx2_destroy_transcode_state(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_transcode_image_level") +wasm_bool_t bt_ktx2_transcode_image_level( + uint64_t ktx2_handle, // handle to KTX2 file, see bt_ktx2_open() + uint32_t level_index, uint32_t layer_index, uint32_t face_index, // KTX2 level/layer/face to transcode + uint64_t output_block_mem_ofs, // allocate using bt_alloc() + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t transcoder_texture_format_u32, // target format, TF_ETC1_RGB etc. + uint32_t decode_flags, // DECODE_FLAGS_ + uint32_t output_row_pitch_in_blocks_or_pixels, // can be 0 + uint32_t output_rows_in_pixels, // can be 0 + int channel0, int channel1, // both default to -1 + uint64_t state_handle); // thread local state: can be 0, or bt_ktx2_create_transcode_state() + diff --git a/Source/ThirdParty/basis_universal/encoder/cppspmd_flow.h b/Source/ThirdParty/basis_universal/encoder/cppspmd_flow.h new file mode 100644 index 000000000..3e83e9eda --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/cppspmd_flow.h @@ -0,0 +1,591 @@ +// Do not include this header directly. +// Control flow functionality in common between all the headers. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef _DEBUG +CPPSPMD_FORCE_INLINE void spmd_kernel::check_masks() +{ + assert(!any(andnot(m_kernel_exec, m_exec))); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_break() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_continue() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + // Kill any active lanes, and remember which lanes were active so we can re-enable them at the end of the loop body. + m_continue_mask = m_continue_mask | m_exec; + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() +{ + // Permenantly kill all active lanes + m_kernel_exec = andnot(m_exec, m_kernel_exec); + m_exec = exec_mask::all_off(); +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) +{ + exec_mask orig_exec = m_exec, orig_kernel_exec = m_kernel_exec; + + m_kernel_exec = exec_mask::all_on(); + m_exec = exec_mask::all_on(); + + unmaskedBody(); + + m_kernel_exec = m_kernel_exec & orig_kernel_exec; + m_exec = m_exec & orig_exec; + + check_masks(); +} + +struct scoped_unmasked_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_kernel_exec; + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_kernel_exec(pKernel->m_kernel_exec) + { + pKernel->m_kernel_exec = exec_mask::all_on(); + pKernel->m_exec = exec_mask::all_on(); + } + + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { + m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; + m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; + m_pKernel->check_masks(); + } +}; + +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_END } + +#if 0 +template +CPPSPMD_FORCE_INLINE decltype(auto) spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + return kernel._call(std::forward(args)...); +} +#else +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + kernel._call(std::forward(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + exec_mask cond_exec(cond); + + m_exec = andnot(m_exec & cond_exec, m_exec); + + check_masks(); +} + +// No SPMD breaks, continues, etc. allowed +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sif(const vbool& cond, const IfBody& ifBody) +{ + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + const exec_mask orig_exec = m_exec; + m_exec = im; + ifBody(); + m_exec = orig_exec; + } +} + +// No SPMD breaks, continues, etc. allowed +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody) +{ + const exec_mask orig_exec = m_exec; + + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + m_exec = im; + ifBody(); + } + + exec_mask em = orig_exec & exec_mask(!cond); + + if (any(em)) + { + m_exec = em; + elseBody(); + } + + m_exec = orig_exec; +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) +{ + exec_mask cond_exec(cond); + + exec_mask pre_if_exec = cond_exec & m_exec; + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody) +{ + bool all_flag = false; + + exec_mask cond_exec(cond); + + { + exec_mask pre_if_exec = cond_exec & m_exec; + + int mask = pre_if_exec.get_movemask(); + if (mask != 0) + { + all_flag = ((uint32_t)mask == m_exec.get_movemask()); + + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } + + if (!all_flag) + { + exec_mask pre_if_exec = andnot(cond_exec, m_exec); + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = cond_exec & m_exec; + m_exec = pre_if_exec; + + // 11/22/2025: changed to elseBody() here, simple bug, we use the macro variants of ifelse anyway + elseBody(); + + // Propagate any lanes that got disabled inside the else body into the exec mask outside the else body, but turn on any lanes that didn't execute inside the else body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } +} + +struct scoped_exec_restorer +{ + exec_mask *m_pMask; + exec_mask m_prev_mask; + CPPSPMD_FORCE_INLINE scoped_exec_restorer(exec_mask *pExec_mask) : m_pMask(pExec_mask), m_prev_mask(*pExec_mask) { } + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer() { *m_pMask = m_prev_mask; } +}; + +// Cannot use SPMD break, continue, or return inside "simple" if/else +#define SPMD_SIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SENDIF } + +// Same as SPMD_SIF, except doesn't use a scoped object +#define SPMD_SIF2(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE2(cond) m_exec = _orig_exec; } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SEND_IF2 m_exec = _orig_exec; } + +// Same as SPMD_SIF(), except the if/else blocks are always executed +#define SPMD_SAIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAENDIF } + +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SSELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); +#define SPMD_SCASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); _select_executed = _select_executed | m_exec; + +//#define SPMD_SCASE_END if (_select_executed.get_movemask() == _orig_exec.m_prev_mask.get_movemask()) break; } +#define SPMD_SCASE_END if (!any(_select_executed ^ _orig_exec.m_prev_mask)) break; } +#define SPMD_SDEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); if (any(_all_other_lanes)) { m_exec = _all_other_lanes; +#define SPMD_SDEFAULT_END } +#define SPMD_SSELECT_END } while(0); + +// Same as SPMD_SSELECT, except all cases are executed. +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SASELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); + +#define SPMD_SACASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); \ + _select_executed = _select_executed | m_exec; + +#define SPMD_SACASE_END } +#define SPMD_SADEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); { m_exec = _all_other_lanes; +#define SPMD_SADEFAULT_END } +#define SPMD_SASELECT_END } while(0); + +struct scoped_exec_restorer2 +{ + spmd_kernel *m_pKernel; + exec_mask m_unexecuted_lanes; + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + m_pKernel(pKernel) + { + exec_mask cond_exec(cond); + m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); + pKernel->m_exec = cond_exec & pKernel->m_exec; + } + + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { + m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; + m_pKernel->check_masks(); + } +}; + +#define SPMD_IF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); if (any(m_exec)) { +#define SPMD_ELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); if (any(m_exec)) { +#define SPMD_END_IF } } + +// Same as SPMD_IF, except the conditional block is always executed. +#define SPMD_AIF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); { +#define SPMD_AELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); { +#define SPMD_AEND_IF } } + +class scoped_exec_saver +{ + exec_mask m_exec, m_kernel_exec, m_continue_mask; + spmd_kernel *m_pKernel; +#ifdef _DEBUG + bool m_in_loop; +#endif + +public: + inline scoped_exec_saver(spmd_kernel *pKernel) : + m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), + m_pKernel(pKernel) + { +#ifdef _DEBUG + m_in_loop = pKernel->m_in_loop; +#endif + } + + inline ~scoped_exec_saver() + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#define SPMD_BEGIN_CALL scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_continue_mask = exec_mask::all_off(); +#define SPMD_BEGIN_CALL_ALL_LANES scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_exec = exec_mask::all_on(); m_continue_mask = exec_mask::all_off(); + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const ForeachBody& foreachBody) +{ + if (begin == end) + return; + + if (!any(m_exec)) + return; + + // We don't support iterating backwards. + if (begin > end) + std::swap(begin, end); + + exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; + + int total_full = (end - begin) / PROGRAM_COUNT; + int total_partial = (end - begin) % PROGRAM_COUNT; + + lint_t loop_index = begin + program_index; + + const int total_loops = total_full + (total_partial ? 1 : 0); + + m_continue_mask = exec_mask::all_off(); + + for (int i = 0; i < total_loops; i++) + { + int n = PROGRAM_COUNT; + if ((i == (total_loops - 1)) && (total_partial)) + { + exec_mask partial_mask = exec_mask(vint_t(total_partial) > vint_t(program_index)); + m_exec = m_exec & partial_mask; + n = total_partial; + } + + foreachBody(loop_index, n); + + m_exec = m_exec | m_continue_mask; + if (!any(m_exec)) + break; + + m_continue_mask = exec_mask::all_off(); + check_masks(); + + store_all(loop_index, loop_index + PROGRAM_COUNT); + } + + m_exec = prev_exec & m_kernel_exec; + m_continue_mask = prev_continue_mask; + check_masks(); +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody) +{ + exec_mask orig_exec = m_exec; + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(whileCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + whileBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + } + +#ifdef _DEBUG + m_in_loop = prev_in_loop; +#endif + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + check_masks(); +} + +struct scoped_while_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_continue_mask; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_continue_mask(pKernel->m_continue_mask) + { + pKernel->m_continue_mask.all_off(); + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { + m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; + m_pKernel->m_continue_mask = m_orig_continue_mask; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#undef SPMD_WHILE +#undef SPMD_WEND +#define SPMD_WHILE(cond) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; + +#define SPMD_WEND m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); } } + +// Nesting is not supported (although it will compile, but the results won't make much sense). +#define SPMD_FOREACH(loop_var, bi, ei) if (((bi) != (ei)) && (any(m_exec))) { \ + scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + uint32_t b = (uint32_t)(bi), e = (uint32_t)(ei); if ((b) > (e)) { std::swap(b, e); } const uint32_t total_full = ((e) - (b)) >> PROGRAM_COUNT_SHIFT, total_partial = ((e) - (b)) & (PROGRAM_COUNT - 1); \ + lint_t loop_var = program_index + (int)b; const uint32_t total_loops = total_full + (total_partial ? 1U : 0U); \ + for (uint32_t CPPSPMD_GLUER2(_foreach_counter, __LINE__) = 0; CPPSPMD_GLUER2(_foreach_counter, __LINE__) < total_loops; ++CPPSPMD_GLUER2(_foreach_counter, __LINE__)) { \ + if ((CPPSPMD_GLUER2(_foreach_counter, __LINE__) == (total_loops - 1)) && (total_partial)) { exec_mask partial_mask = exec_mask(vint_t((int)total_partial) > vint_t(program_index)); m_exec = m_exec & partial_mask; } + +#define SPMD_FOREACH_END(loop_var) m_exec = m_exec | m_continue_mask; if (!any(m_exec)) break; m_continue_mask = exec_mask::all_off(); check_masks(); store_all(loop_var, loop_var + PROGRAM_COUNT); } } + +// Okay to use spmd_continue or spmd_return, but not spmd_break +#define SPMD_FOREACH_ACTIVE(index_var) int64_t index_var; { uint64_t _movemask = m_exec.get_movemask(); if (_movemask) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + for (uint32_t _i = 0; _i < PROGRAM_COUNT; ++_i) { \ + if (_movemask & (1U << _i)) { \ + m_exec.enable_lane(_i); m_exec = m_exec & m_kernel_exec; \ + (index_var) = _i; \ + +#define SPMD_FOREACH_ACTIVE_END } } } } + +// Okay to use spmd_continue, but not spmd_break/spmd_continue +#define SPMD_FOREACH_UNIQUE_INT(index_var, var) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + CPPSPMD_DECL(int_t, _vals[PROGRAM_COUNT]); store_linear_all(_vals, var); std::sort(_vals, _vals + PROGRAM_COUNT); \ + const int _n = (int)(std::unique(_vals, _vals + PROGRAM_COUNT) - _vals); \ + for (int _i = 0; _i < _n; ++_i) { int index_var = _vals[_i]; vbool cond = (vint_t(var) == vint_t(index_var)); m_exec = exec_mask(cond); + +#define SPMD_FOREACH_UNIQUE_INT_END } } + +struct scoped_simple_while_restorer +{ + spmd_kernel* m_pKernel; + exec_mask m_orig_exec; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_simple_while_restorer(spmd_kernel* pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec) + { + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_simple_while_restorer() + { + m_pKernel->m_exec = m_orig_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +// Cannot use SPMD break, continue, or return inside simple while + +#define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + while(true) { \ + exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_SWEND } } + +// Cannot use SPMD break, continue, or return inside simple do +#define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } + +#undef SPMD_FOR +#undef SPMD_END_FOR +#define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) +{ + exec_mask orig_exec = m_exec; + + forInitBody(); + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(forCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + forBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + + forIncrBody(); + } + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + +#ifdef _DEBUG + m_in_loop = prev_in_loop; + check_masks(); +#endif +} diff --git a/Source/ThirdParty/basis_universal/encoder/cppspmd_math.h b/Source/ThirdParty/basis_universal/encoder/cppspmd_math.h new file mode 100644 index 000000000..404032409 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/cppspmd_math.h @@ -0,0 +1,725 @@ +// Do not include this header directly. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The general goal of these vectorized estimated math functions is scalability/performance. +// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. +// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper +// engineering analysis before relying on them. +// I have chosen functions written by others, ported them to CppSPMD, then measured their abs/rel errors. +// I compared each to the ones in DirectXMath and stdlib's for accuracy/performance. + +CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + vfloat c = frac(abs(a * b_inv)) * abs(b); + return spmd_ternaryf(a < 0, -c, c); +} + +CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + return frac(a * b_inv) * b; +} + +// Avoids dividing by zero or very small values. +CPPSPMD_FORCE_INLINE vfloat safe_div(vfloat a, vfloat b, float fDivThresh = 1e-7f) +{ + return a / spmd_ternaryf( abs(b) > fDivThresh, b, spmd_ternaryf(b < 0.0f, -fDivThresh, fDivThresh) ); +} + +/* + clang 9.0.0 for win /fp:precise release + f range: 0.0000000000001250 10000000000.0000000000000000, vals: 1073741824 + + log2_est(): + max abs err: 0.0000023076808731 + max rel err: 0.0000000756678881 + avg abs err: 0.0000007535452724 + avg rel err: 0.0000000235117843 + + XMVectorLog2(): + max abs err: 0.0000023329709933 + max rel err: 0.0000000826961046 + avg abs err: 0.0000007564889684 + avg rel err: 0.0000000236051899 + + std::log2f(): + max abs err: 0.0000020265979401 + max rel err: 0.0000000626647654 + avg abs err: 0.0000007494445227 + avg rel err: 0.0000000233800985 +*/ + +// See https://tech.ebayinc.com/engineering/fast-approximate-logarithms-part-iii-the-formulas/ +inline vfloat spmd_kernel::log2_est(vfloat v) +{ + vfloat signif, fexp; + + // Just clamp to a very small value, instead of checking for invalid inputs. + vfloat x = max(v, 2.2e-38f); + + /* + * Assume IEEE representation, which is sgn(1):exp(8):frac(23) + * representing (1+frac)*2^(exp-127). Call 1+frac the significand + */ + + // get exponent + vint ux1_i = cast_vfloat_to_vint(x); + + vint exp = VUINT_SHIFT_RIGHT(ux1_i & 0x7F800000, 23); + + // actual exponent is exp-127, will subtract 127 later + + vint ux2_i; + vfloat ux2_f; + + vint greater = ux1_i & 0x00400000; // true if signif > 1.5 + SPMD_SIF(greater != 0) + { + // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 + store_all(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f000000); + + store_all(ux2_f, cast_vint_to_vfloat(ux2_i)); + + // 126 instead of 127 compensates for division by 2 + store_all(fexp, vfloat(exp - 126)); + } + SPMD_SELSE(greater != 0) + { + // get signif by stuffing exp = 127 which corresponds to an exponent of 0 + store(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f800000); + + store(ux2_f, cast_vint_to_vfloat(ux2_i)); + + store(fexp, vfloat(exp - 127)); + } + SPMD_SENDIF + + store_all(signif, ux2_f); + store_all(signif, signif - 1.0f); + + const float a = 0.1501692f, b = 3.4226132f, c = 5.0225057f, d = 4.1130283f, e = 3.4813372f; + + vfloat xm1 = signif; + vfloat xm1sqr = xm1 * xm1; + + return fexp + ((a * (xm1sqr * xm1) + b * xm1sqr + c * xm1) / (xm1sqr + d * xm1 + e)); + + // fma lowers accuracy for SSE4.1 - no idea why (compiler reordering?) + //return fexp + ((vfma(a, (xm1sqr * xm1), vfma(b, xm1sqr, c * xm1))) / (xm1sqr + vfma(d, xm1, e))); +} + +// Uses log2_est(), so this function must be <= the precision of that. +inline vfloat spmd_kernel::log_est(vfloat v) +{ + return log2_est(v) * 0.693147181f; +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment) +{ + // Assume we're using equation (2) + store_all(adjustment, 0); + + // integer part of the input argument + vint int_arg = (vint)arg; + + // if frac(arg) is in [0.5, 1.0]... + SPMD_SIF((arg - int_arg) > 0.5f) + { + store(adjustment, 1); + + // then change it to [0.0, 0.5] + store(arg, arg - 0.5f); + } + SPMD_SENDIF + + // arg == just the fractional part + store_all(arg, arg - (vfloat)int_arg); + + // Now compute 2** (int) arg. + store_all(int_arg, min(int_arg + 127, 254)); + + store_all(two_int_a, cast_vint_to_vfloat(VINT_SHIFT_LEFT(int_arg, 23))); +} + +/* + clang 9.0.0 for win /fp:precise release + f range : -50.0000000000000000 49.9999940395355225, vals : 16777216 + + exp2_est(): + Total passed near - zero check : 16777216 + Total sign diffs : 0 + max abs err: 1668910609.7500000000000000 + max rel err: 0.0000015642030031 + avg abs err: 10793794.4007573910057545 + avg rel err: 0.0000003890893282 + + XMVectorExp2(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1665552836.8750000000000000 + max rel err: 0.0000114674862370 + avg abs err: 10771868.2627860084176064 + avg rel err: 0.0000011218880770 + + std::exp2f(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1591636585.6250000000000000 + max rel err: 0.0000014849731018 + avg abs err: 10775800.3204844966530800 + avg rel err: 0.0000003851496422 +*/ + +// http://www.ganssle.com/item/approximations-c-code-exponentiation-log.htm +inline vfloat spmd_kernel::exp2_est(vfloat arg) +{ + SPMD_BEGIN_CALL + + const vfloat P00 = +7.2152891521493f; + const vfloat P01 = +0.0576900723731f; + const vfloat Q00 = +20.8189237930062f; + const vfloat Q01 = +1.0f; + const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling + + vfloat result = 0.0f; + + // Return 0 if arg is too large. + // We're not introducing inf/nan's into calculations, or risk doing so by returning huge default values. + SPMD_IF(abs(arg) > 126.0f) + { + spmd_return(); + } + SPMD_END_IF + + // 2**(int(a)) + vfloat two_int_a; + + // set to 1 by reduce_expb + vint adjustment; + + // 0 if arg is +; 1 if negative + vint negative = 0; + + // If the input is negative, invert it. At the end we'll take the reciprocal, since n**(-1) = 1/(n**x). + SPMD_SIF(arg < 0.0f) + { + store(arg, -arg); + store(negative, 1); + } + SPMD_SENDIF + + store_all(arg, min(arg, 126.0f)); + + // reduce to [0.0, 0.5] + reduce_expb(arg, two_int_a, adjustment); + + // The format of the polynomial is: + // answer=(Q(x**2) + x*P(x**2))/(Q(x**2) - x*P(x**2)) + // + // The following computes the polynomial in several steps: + + // Q(x**2) + vfloat Q = vfma(Q01, (arg * arg), Q00); + + // x*P(x**2) + vfloat x_P = arg * (vfma(P01, arg * arg, P00)); + + vfloat answer = (Q + x_P) / (Q - x_P); + + // Now correct for the scaling factor of 2**(int(a)) + store_all(answer, answer * two_int_a); + + // If the result had a fractional part > 0.5, correct for that + store_all(answer, spmd_ternaryf(adjustment != 0, answer * sqrt2, answer)); + + // Correct for a negative input + SPMD_SIF(negative != 0) + { + store(answer, 1.0f / answer); + } + SPMD_SENDIF + + store(result, answer); + + return result; +} + +inline vfloat spmd_kernel::exp_est(vfloat arg) +{ + // e^x = exp2(x / log_base_e(2)) + // constant is 1.0/(log(2)/log(e)) or 1/log(2) + return exp2_est(arg * 1.44269504f); +} + +inline vfloat spmd_kernel::pow_est(vfloat arg1, vfloat arg2) +{ + return exp_est(log_est(arg1) * arg2); +} + +/* + clang 9.0.0 for win /fp:precise release + Total near-zero: 144, output above near-zero tresh: 30 + Total near-zero avg: 0.0000067941016621 max: 0.0000134706497192 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16777072 + Total sign diffs: 5 + max abs err: 0.0000031375306036 + max rel err: 0.1140846017075028 + avg abs err: 0.0000003026226621 + avg rel err: 0.0000033564977623 +*/ + +// Math from this web page: http://developer.download.nvidia.com/cg/sin.html +// This is ~2x slower than sin_est() or cos_est(), and less accurate, but I'm keeping it here for comparison purposes to help validate/sanity check sin_est() and cos_est(). +inline vfloat spmd_kernel::sincos_est_a(vfloat a, bool sin_flag) +{ + const float c0_x = 0.0f, c0_y = 0.5f, c0_z = 1.0f; + const float c1_x = 0.25f, c1_y = -9.0f, c1_z = 0.75f, c1_w = 0.159154943091f; + const float c2_x = 24.9808039603f, c2_y = -24.9808039603f, c2_z = -60.1458091736f, c2_w = 60.1458091736f; + const float c3_x = 85.4537887573f, c3_y = -85.4537887573f, c3_z = -64.9393539429f, c3_w = 64.9393539429f; + const float c4_x = 19.7392082214f, c4_y = -19.7392082214f, c4_z = -1.0f, c4_w = 1.0f; + + vfloat r0_x, r0_y, r0_z, r1_x, r1_y, r1_z, r2_x, r2_y, r2_z; + + store_all(r1_x, sin_flag ? vfms(c1_w, a, c1_x) : c1_w * a); + + store_all(r1_y, frac(r1_x)); + + store_all(r2_x, (vfloat)(r1_y < c1_x)); + + store_all(r2_y, (vfloat)(r1_y >= c1_y)); + store_all(r2_z, (vfloat)(r1_y >= c1_z)); + + store_all(r2_y, vfma(r2_x, c4_z, vfma(r2_y, c4_w, r2_z * c4_z))); + + store_all(r0_x, c0_x - r1_y); + store_all(r0_y, c0_y - r1_y); + store_all(r0_z, c0_z - r1_y); + + store_all(r0_x, r0_x * r0_x); + store_all(r0_y, r0_y * r0_y); + store_all(r0_z, r0_z * r0_z); + + store_all(r1_x, vfma(c2_x, r0_x, c2_z)); + store_all(r1_y, vfma(c2_y, r0_y, c2_w)); + store_all(r1_z, vfma(c2_x, r0_z, c2_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_x)); + store_all(r1_y, vfma(r1_y, r0_y, c3_y)); + store_all(r1_z, vfma(r1_z, r0_z, c3_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_z)); + store_all(r1_y, vfma(r1_y, r0_y, c3_w)); + store_all(r1_z, vfma(r1_z, r0_z, c3_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_x)); + store_all(r1_y, vfma(r1_y, r0_y, c4_y)); + store_all(r1_z, vfma(r1_z, r0_z, c4_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_z)); + store_all(r1_y, vfma(r1_y, r0_y, c4_w)); + store_all(r1_z, vfma(r1_z, r0_z, c4_z)); + + store_all(r0_x, vfnma(r1_x, r2_x, vfnma(r1_y, r2_y, r1_z * -r2_z))); + + return r0_x; +} + +// positive values only +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1(const vfloat& q) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1_pn(const vfloat& t) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat s = sign(t); + vfloat q = abs(t); + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f) * s; +} + +// https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf +// https://github.com/hcs0/Hackers-Delight/blob/master/rsqrt.c.txt +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est1(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F375A82) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + return x * vfnma(xhalf * x, x, 1.5008909f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est2(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F37599E) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + vfloat x1 = x * vfnma(xhalf * x, x, 1.5); + vfloat x2 = x1 * vfnma(xhalf * x1, x1, 1.5); + return x2; +} + +// Math from: http://developer.download.nvidia.com/cg/atan2.html +// TODO: Needs more validation, parameter checking. +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) +{ + vfloat t1 = abs(y); + vfloat t3 = abs(x); + + vfloat t0 = max(t3, t1); + store_all(t1, min(t3, t1)); + + store_all(t3, t1 / t0); + + vfloat t4 = t3 * t3; + store_all(t0, vfma(-0.013480470f, t4, 0.057477314f)); + store_all(t0, vfms(t0, t4, 0.121239071f)); + store_all(t0, vfma(t0, t4, 0.195635925f)); + store_all(t0, vfms(t0, t4, 0.332994597f)); + store_all(t0, vfma(t0, t4, 0.999995630f)); + store_all(t3, t0 * t3); + + store_all(t3, spmd_ternaryf(abs(y) > abs(x), vfloat(1.570796327f) - t3, t3)); + + store_all(t3, spmd_ternaryf(x < 0.0f, vfloat(3.141592654f) - t3, t3)); + store_all(t3, spmd_ternaryf(y < 0.0f, -t3, t3)); + + return t3; +} + +/* + clang 9.0.0 for win /fp:precise release + Tested range: -25.1327412287183449 25.1327382326621169, vals : 16777216 + Skipped angles near 90/270 within +- .001 radians. + Near-zero threshold: .0000125f + Near-zero output above check threshold: 1e-6f + + Total near-zero: 144, output above near-zero tresh: 20 + Total near-zero avg: 0.0000067510751968 max: 0.0000133514404297 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16766400 + Total sign diffs: 5 + max abs err: 1.4982600811139264 + max rel err: 0.1459155900188041 + avg rel err: 0.0000054659502568 + + XMVectorTan() precise: + Total near-zero: 144, output above near-zero tresh: 18 + Total near-zero avg: 0.0000067641216186 max: 0.0000133524126795 + Total near-zero sign diffs: 0 + Total passed near-zero check: 16766400 + Total sign diffs: 0 + max abs err: 1.9883573246424930 + max rel err: 0.1459724171926864 + avg rel err: 0.0000054965766843 + + std::tanf(): + Total near-zero: 144, output above near-zero tresh: 0 + Total near-zero avg: 0.0000067116930779 max: 0.0000127713074107 + Total near-zero sign diffs: 11 + Total passed near-zero check: 16766400 + Total sign diffs: 11 + max abs err: 0.8989131818294709 + max rel err: 0.0573181403173166 + avg rel err: 0.0000030791301203 + + Originally from: + http://www.ganssle.com/approx.htm +*/ + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::tan82(vfloat x) +{ + // Original double version was 8.2 digits + //double c1 = 211.849369664121f, c2 = -12.5288887278448f, c3 = 269.7350131214121f, c4 = -71.4145309347748f; + // Tuned float constants for lower avg rel error (without using FMA3): + const float c1 = 211.849350f, c2 = -12.5288887f, c3 = 269.734985f, c4 = -71.4145203f; + vfloat x2 = x * x; + return (x * (vfma(c2, x2, c1)) / (vfma(x2, (c4 + x2), c3))); +} + +// Don't call this for angles close to 90/270!. +inline vfloat spmd_kernel::tan_est(vfloat x) +{ + const float fPi = 3.141592653589793f, fOneOverPi = 0.3183098861837907f; + CPPSPMD_DECL(const uint8_t, s_table0[16]) = { 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4 }; + + vint table = init_lookup4(s_table0); // a load + vint sgn = cast_vfloat_to_vint(x) & 0x80000000; + + store_all(x, abs(x)); + vfloat orig_x = x; + + vfloat q = x * fOneOverPi; + store_all(x, q - floor(q)); + + vfloat x4 = x * 4.0f; + vint octant = (vint)(x4); + + vfloat x0 = spmd_ternaryf((octant & 1) != 0, -x4, x4); + + vint k = table_lookup4_8(octant, table) & 0xFF; // a shuffle + + vfloat bias = (vfloat)k + -128.0f; + vfloat y = x0 + bias; + + vfloat z = tan82(y); + + vfloat r; + + vbool octant_one_or_two = (octant == 1) || (octant == 2); + + // SPMD optimization - skip costly divide if we can + if (spmd_any(octant_one_or_two)) + { + const float fDivThresh = .4371e-7f; + vfloat one_over_z = 1.0f / spmd_ternaryf(abs(z) > fDivThresh, z, spmd_ternaryf(z < 0.0f, -fDivThresh, fDivThresh)); + + vfloat b = spmd_ternaryf(octant_one_or_two, one_over_z, z); + store_all(r, spmd_ternaryf((octant & 2) != 0, -b, b)); + } + else + { + store_all(r, spmd_ternaryf(octant == 0, z, -z)); + } + + // Small angle approximation, to decrease the max rel error near Pi. + SPMD_SIF(x >= (1.0f - .0003125f*4.0f)) + { + store(r, vfnma(floor(q) + 1.0f, fPi, orig_x)); + } + SPMD_SENDIF + + return cast_vint_to_vfloat(cast_vfloat_to_vint(r) ^ sgn); +} + +inline void spmd_kernel::seed_rand(rand_context& x, vint seed) +{ + store(x.a, 0xf1ea5eed); + store(x.b, seed ^ 0xd8487b1f); + store(x.c, seed ^ 0xdbadef9a); + store(x.d, seed); + for (int i = 0; i < 20; ++i) + (void)get_randu(x); +} + +// https://burtleburtle.net/bob/rand/smallprng.html +// Returns 32-bit unsigned random numbers. +inline vint spmd_kernel::get_randu(rand_context& x) +{ + vint e = x.a - VINT_ROT(x.b, 27); + store(x.a, x.b ^ VINT_ROT(x.c, 17)); + store(x.b, x.c + x.d); + store(x.c, x.d + e); + store(x.d, e + x.a); + return x.d; +} + +// Returns random numbers between [low, high), or low if low >= high +inline vint spmd_kernel::get_randi(rand_context& x, vint low, vint high) +{ + vint rnd = get_randu(x); + + vint range = high - low; + + vint rnd_range = mulhiu(rnd, range); + + return spmd_ternaryi(low < high, low + rnd_range, low); +} + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat spmd_kernel::get_randf(rand_context& x, vfloat low, vfloat high) +{ + vint rndi = get_randu(x) & 0x7fffff; + + vfloat rnd = (vfloat)(rndi) * (1.0f / 8388608.0f); + + return spmd_ternaryf(low < high, vfma(high - low, rnd, low), low); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::init_reverse_bits(vint& tab1, vint& tab2) +{ + const uint8_t tab1_bytes[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const uint8_t tab2_bytes[16] = { 0, 8 << 4, 4 << 4, 12 << 4, 2 << 4, 10 << 4, 6 << 4, 14 << 4, 1 << 4, 9 << 4, 5 << 4, 13 << 4, 3 << 4, 11 << 4, 7 << 4, 15 << 4 }; + store_all(tab1, init_lookup4(tab1_bytes)); + store_all(tab2, init_lookup4(tab2_bytes)); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::reverse_bits(vint k, vint tab1, vint tab2) +{ + vint r0 = table_lookup4_8(k & 0x7F7F7F7F, tab2); + vint r1 = table_lookup4_8(VUINT_SHIFT_RIGHT(k, 4) & 0x7F7F7F7F, tab1); + vint r3 = r0 | r1; + return byteswap(r3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros(vint x) +{ + CPPSPMD_DECL(const uint8_t, s_tab[16]) = { 0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + vint tab = init_lookup4(s_tab); + + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + return table_lookup4_8(VUINT_SHIFT_RIGHT(x2, 28), tab) + n2; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros_alt(vint x) +{ + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + // x <= 0x3fffffff + vbool c3 = (x2 & 0xC0000000) == 0; + vint n3 = spmd_ternaryi(c3, n2 + 2, n2); + vint x3 = spmd_ternaryi(c3, VINT_SHIFT_LEFT(x2, 2), x2); + + // x <= 0x7fffffff + vbool c4 = (x3 & 0x80000000) == 0; + return spmd_ternaryi(c4, n3 + 1, n3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_trailing_zeros(vint x) +{ + // cast the least significant bit in v to a float + vfloat f = (vfloat)(x & -x); + + // extract exponent and adjust + return VUINT_SHIFT_RIGHT(cast_vfloat_to_vint(f), 23) - 0x7F; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) +{ + vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); + vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); + return VUINT_SHIFT_RIGHT(((v1 + (VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F)) * 0x1010101), 24); +} + +CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) +{ + return cmpeq_epi16(subs_epu16(a, b), vint(0)); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) +{ + return cmple_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpgt_epu16(const vint &a, const vint &b) +{ + return andnot(cmpeq_epi16(a, b), cmple_epu16(b, a)); +} + +CPPSPMD_FORCE_INLINE vint cmplt_epu16(const vint &a, const vint &b) +{ + return cmpgt_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epi16(const vint &a, const vint &b) +{ + return cmpeq_epi16(a, b) | cmpgt_epi16(a, b); +} + +CPPSPMD_FORCE_INLINE vint cmple_epi16(const vint &a, const vint &b) +{ + return cmpge_epi16(b, a); +} + +void spmd_kernel::print_vint(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_vbool(vbool v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i) ? 1 : 0); + printf("\n"); +} + +void spmd_kernel::print_vint_hex(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("0x%X ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_active_lanes(const char *pPrefix) +{ + CPPSPMD_DECL(int, flags[PROGRAM_COUNT]); + memset(flags, 0, sizeof(flags)); + storeu_linear(flags, vint(1)); + + if (pPrefix) + printf("%s", pPrefix); + + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + { + if (flags[i]) + printf("%u ", i); + } + printf("\n"); +} + +void spmd_kernel::print_vfloat(vfloat v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%f ", extract(v, i)); + printf("\n"); +} diff --git a/Source/ThirdParty/basis_universal/encoder/cppspmd_math_declares.h b/Source/ThirdParty/basis_universal/encoder/cppspmd_math_declares.h new file mode 100644 index 000000000..592e0b954 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/cppspmd_math_declares.h @@ -0,0 +1,88 @@ +// Do not include this header directly. +// This header defines shared struct spmd_kernel helpers. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// See cppspmd_math.h for detailed error statistics. + +CPPSPMD_FORCE_INLINE void reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment); +CPPSPMD_FORCE_INLINE vfloat tan56(vfloat x); +CPPSPMD_FORCE_INLINE vfloat tan82(vfloat x); + +inline vfloat log2_est(vfloat v); + +inline vfloat log_est(vfloat v); + +inline vfloat exp2_est(vfloat arg); + +inline vfloat exp_est(vfloat arg); + +inline vfloat pow_est(vfloat arg1, vfloat arg2); + +CPPSPMD_FORCE_INLINE vfloat recip_est1(const vfloat& q); +CPPSPMD_FORCE_INLINE vfloat recip_est1_pn(const vfloat& q); + +inline vfloat mod_angles(vfloat a); + +inline vfloat sincos_est_a(vfloat a, bool sin_flag); +CPPSPMD_FORCE_INLINE vfloat sin_est_a(vfloat a) { return sincos_est_a(a, true); } +CPPSPMD_FORCE_INLINE vfloat cos_est_a(vfloat a) { return sincos_est_a(a, false); } + +inline vfloat sin_est(vfloat a); + +inline vfloat cos_est(vfloat a); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est1(vfloat x0); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est2(vfloat x0); + +CPPSPMD_FORCE_INLINE vfloat atan2_est(vfloat y, vfloat x); + +CPPSPMD_FORCE_INLINE vfloat atan_est(vfloat x) { return atan2_est(x, vfloat(1.0f)); } + +// Don't call this for angles close to 90/270! +inline vfloat tan_est(vfloat x); + +// https://burtleburtle.net/bob/rand/smallprng.html +struct rand_context { vint a, b, c, d; }; + +inline void seed_rand(rand_context& x, vint seed); + +// Returns 32-bit unsigned random numbers. +inline vint get_randu(rand_context& x); + +// Returns random numbers between [low, high), or low if low >= high +inline vint get_randi(rand_context& x, vint low, vint high); + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat get_randf(rand_context& x, vfloat low, vfloat high); + +CPPSPMD_FORCE_INLINE void init_reverse_bits(vint& tab1, vint& tab2); +CPPSPMD_FORCE_INLINE vint reverse_bits(vint k, vint tab1, vint tab2); + +CPPSPMD_FORCE_INLINE vint count_leading_zeros(vint x); +CPPSPMD_FORCE_INLINE vint count_leading_zeros_alt(vint x); + +CPPSPMD_FORCE_INLINE vint count_trailing_zeros(vint x); + +CPPSPMD_FORCE_INLINE vint count_set_bits(vint x); + +void print_vint(vint v); +void print_vbool(vbool v); +void print_vint_hex(vint v); +void print_active_lanes(const char *pPrefix); +void print_vfloat(vfloat v); diff --git a/Source/ThirdParty/basis_universal/encoder/cppspmd_sse.h b/Source/ThirdParty/basis_universal/encoder/cppspmd_sse.h new file mode 100644 index 000000000..339e3c4aa --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/cppspmd_sse.h @@ -0,0 +1,2104 @@ +// cppspmd_sse.h +// Copyright 2020-2022 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Notes for Basis Universal: +// All of the "cppspmd" code and headers are OPTIONAL to Basis Universal. if BASISU_SUPPORT_SSE is 0, it will never be included and does not impact compilation. +// The techniques used in this code were originally demonstrated for AVX2 by Nicolas Guillemot, Jefferson Amstutz in their "CppSPMD" project. +// This is new code for use in Basis Universal, although it uses the same general SPMD techniques in SSE 2/4. + +#include +#include +#include +#include +#include +#include + +#if CPPSPMD_SSE2 +#include // SSE +#include // SSE2 +#else +#include // SSE +#include // SSE2 +#include // SSE3 +#include // SSSE3 +#include // SSE4.1 +//#include // SSE4.2 +#endif + +#undef CPPSPMD_SSE +#undef CPPSPMD_AVX1 +#undef CPPSPMD_AVX2 +#undef CPPSPMD_AVX +#undef CPPSPMD_FLOAT4 +#undef CPPSPMD_INT16 + +#define CPPSPMD_SSE 1 +#define CPPSPMD_AVX 0 +#define CPPSPMD_AVX1 0 +#define CPPSPMD_AVX2 0 +#define CPPSPMD_FLOAT4 0 +#define CPPSPMD_INT16 0 + +#ifdef _MSC_VER + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) __declspec(align(16)) type name + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __declspec(align(v)) + #endif + + #define _mm_undefined_si128 _mm_setzero_si128 + #define _mm_undefined_ps _mm_setzero_ps +#else + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) type name __attribute__((aligned(32))) + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __attribute__((aligned(v))) + #endif +#endif + +#ifndef CPPSPMD_FORCE_INLINE +#ifdef _DEBUG +#define CPPSPMD_FORCE_INLINE inline +#else + #ifdef _MSC_VER + #define CPPSPMD_FORCE_INLINE __forceinline + #else + #define CPPSPMD_FORCE_INLINE inline + #endif +#endif +#endif + +#undef CPPSPMD +#undef CPPSPMD_ARCH + +#if CPPSPMD_SSE2 + #define CPPSPMD_SSE41 0 + #define CPPSPMD cppspmd_sse2 + #define CPPSPMD_ARCH _sse2 +#else + #define CPPSPMD_SSE41 1 + #define CPPSPMD cppspmd_sse41 + #define CPPSPMD_ARCH _sse41 +#endif + +#ifndef CPPSPMD_GLUER + #define CPPSPMD_GLUER(a, b) a##b +#endif + +#ifndef CPPSPMD_GLUER2 + #define CPPSPMD_GLUER2(a, b) CPPSPMD_GLUER(a, b) +#endif + +#ifndef CPPSPMD_NAME +#define CPPSPMD_NAME(a) CPPSPMD_GLUER2(a, CPPSPMD_ARCH) +#endif + +#undef VASSERT +#define VCOND(cond) ((exec_mask(vbool(cond)) & m_exec).get_movemask() == m_exec.get_movemask()) +#define VASSERT(cond) assert( VCOND(cond) ) + +#define CPPSPMD_ALIGNMENT (16) + +#define storeu_si32(p, a) (void)(*(int*)(p) = _mm_cvtsi128_si32((a))) + +namespace CPPSPMD +{ + +const int PROGRAM_COUNT_SHIFT = 2; +const int PROGRAM_COUNT = 1 << PROGRAM_COUNT_SHIFT; + +template inline N* aligned_new() { void* p = _mm_malloc(sizeof(N), 64); new (p) N; return static_cast(p); } +template void aligned_delete(N* p) { if (p) { p->~N(); _mm_free(p); } } + +CPPSPMD_DECL(const uint32_t, g_allones_128[4]) = { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX }; +CPPSPMD_DECL(const uint32_t, g_x_128[4]) = { UINT32_MAX, 0, 0, 0 }; +CPPSPMD_DECL(const float, g_onef_128[4]) = { 1.0f, 1.0f, 1.0f, 1.0f }; +CPPSPMD_DECL(const uint32_t, g_oneu_128[4]) = { 1, 1, 1, 1 }; + +CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = +{ + { UINT32_MAX, 0, 0, 0 }, + { 0, UINT32_MAX, 0, 0 }, + { 0, 0, UINT32_MAX, 0 }, + { 0, 0, 0, UINT32_MAX }, +}; + +#if CPPSPMD_SSE41 +CPPSPMD_FORCE_INLINE __m128i _mm_blendv_epi32(__m128i a, __m128i b, __m128i c) { return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(c))); } +#endif + +CPPSPMD_FORCE_INLINE __m128i blendv_epi8(__m128i a, __m128i b, __m128i mask) +{ +#if CPPSPMD_SSE2 + return _mm_castps_si128(_mm_or_ps(_mm_and_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(b)), _mm_andnot_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(a)))); +#else + return _mm_blendv_epi8(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_mask_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // We know it's a mask, so we can just emulate the blend. + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // Input is not a mask, but MSB bits - so emulate _mm_blendv_ps() by replicating bit 31. + mask = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(mask), 31)); + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128i blendv_mask_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_mask_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +CPPSPMD_FORCE_INLINE __m128i blendv_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_cvtsi128_si32(vec); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0x55)); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xAA)); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xFF)); } + +// Returns float bits as int, to emulate _mm_extract_ps() +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { float f = _mm_cvtss_f32(vec); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); return *(const int*)&f; } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { return _mm_cvtss_f32(vec); } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); } +#else +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_extract_epi32(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_extract_epi32(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_extract_epi32(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_extract_epi32(vec, 3); } + +// Returns float bits as int +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { return _mm_extract_ps(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { return _mm_extract_ps(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { return _mm_extract_ps(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { return _mm_extract_ps(vec, 3); } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { int v = extract_ps_x(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { int v = extract_ps_y(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { int v = extract_ps_z(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { int v = extract_ps_w(vec); return *(const float*)&v; } +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 0), (uint32_t)v >> 16U, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 2), (uint32_t)v >> 16U, 3); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 4), (uint32_t)v >> 16U, 5); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 6), (uint32_t)v >> 16U, 7); } +#else +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 0); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 2); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 3); } +#endif + +#if CPPSPMD_SSE2 +inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + // Just emulate _mm_shuffle_epi8. This is very slow, but what else can we do? + CPPSPMD_ALIGN(16) uint8_t av[16]; + _mm_store_si128((__m128i*)av, a); + + CPPSPMD_ALIGN(16) uint8_t bvi[16]; + _mm_store_ps((float*)bvi, _mm_and_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(_mm_set1_epi32(0x0F0F0F0F)))); + + CPPSPMD_ALIGN(16) uint8_t result[16]; + + result[0] = av[bvi[0]]; + result[1] = av[bvi[1]]; + result[2] = av[bvi[2]]; + result[3] = av[bvi[3]]; + + result[4] = av[bvi[4]]; + result[5] = av[bvi[5]]; + result[6] = av[bvi[6]]; + result[7] = av[bvi[7]]; + + result[8] = av[bvi[8]]; + result[9] = av[bvi[9]]; + result[10] = av[bvi[10]]; + result[11] = av[bvi[11]]; + + result[12] = av[bvi[12]]; + result[13] = av[bvi[13]]; + result[14] = av[bvi[14]]; + result[15] = av[bvi[15]]; + + return _mm_andnot_si128(_mm_cmplt_epi8(b, _mm_setzero_si128()), _mm_load_si128((__m128i*)result)); +} +#else +CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + return _mm_shuffle_epi8(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(ac, bc)); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(ac, bc)); +} +#else +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return _mm_min_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return _mm_max_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + return _mm_min_epu32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + return _mm_max_epu32(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + __m128i sign_mask = _mm_srai_epi32(a, 31); + return _mm_sub_epi32(_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(sign_mask))), sign_mask); +} +#else +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + return _mm_abs_epi32(a); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} +#else +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + return _mm_mullo_epi32(a, b); +} +#endif + +CPPSPMD_FORCE_INLINE __m128i mulhi_epu32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 3, 1)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 3, 1))); +} + +#if CPPSPMD_SSE2 +inline __m128i load_rgba32(const void* p) +{ + __m128i xmm = _mm_cvtsi32_si128(*(const int*)p); + xmm = _mm_unpacklo_epi8(xmm, _mm_setzero_si128()); + xmm = _mm_unpacklo_epi16(xmm, _mm_setzero_si128()); + return xmm; +} +#else +inline __m128i load_rgba32(const void* p) +{ + return _mm_cvtepu8_epi32(_mm_castps_si128(_mm_load_ss((const float*)p))); +} +#endif + +inline void transpose4x4(__m128i& x, __m128i& y, __m128i& z, __m128i& w, const __m128i& r0, const __m128i& r1, const __m128i& r2, const __m128i& r3) +{ + __m128i t0 = _mm_unpacklo_epi32(r0, r1); + __m128i t1 = _mm_unpacklo_epi32(r2, r3); + __m128i t2 = _mm_unpackhi_epi32(r0, r1); + __m128i t3 = _mm_unpackhi_epi32(r2, r3); + x = _mm_unpacklo_epi64(t0, t1); + y = _mm_unpackhi_epi64(t0, t1); + z = _mm_unpacklo_epi64(t2, t3); + w = _mm_unpackhi_epi64(t2, t3); +} + +const uint32_t ALL_ON_MOVEMASK = 0xF; + +struct spmd_kernel +{ + struct vint; + struct lint; + struct vbool; + struct vfloat; + + typedef int int_t; + typedef vint vint_t; + typedef lint lint_t; + + // Exec mask + struct exec_mask + { + __m128i m_mask; + + exec_mask() = default; + + CPPSPMD_FORCE_INLINE explicit exec_mask(const vbool& b); + CPPSPMD_FORCE_INLINE explicit exec_mask(const __m128i& mask) : m_mask(mask) { } + + CPPSPMD_FORCE_INLINE void enable_lane(uint32_t lane) { m_mask = _mm_load_si128((const __m128i *)&g_lane_masks_128[lane][0]); } + + static CPPSPMD_FORCE_INLINE exec_mask all_on() { return exec_mask{ _mm_load_si128((const __m128i*)g_allones_128) }; } + static CPPSPMD_FORCE_INLINE exec_mask all_off() { return exec_mask{ _mm_setzero_si128() }; } + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return _mm_movemask_ps(_mm_castsi128_ps(m_mask)); } + }; + + friend CPPSPMD_FORCE_INLINE bool all(const exec_mask& e); + friend CPPSPMD_FORCE_INLINE bool any(const exec_mask& e); + + CPPSPMD_FORCE_INLINE bool spmd_all() const { return all(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_any() const { return any(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_none() { return !any(m_exec); } + + // true if cond is true for all active lanes - false if no active lanes + CPPSPMD_FORCE_INLINE bool spmd_all(const vbool& e) { uint32_t m = m_exec.get_movemask(); return (m != 0) && ((exec_mask(e) & m_exec).get_movemask() == m); } + // true if cond is true for any active lanes + CPPSPMD_FORCE_INLINE bool spmd_any(const vbool& e) { return (exec_mask(e) & m_exec).get_movemask() != 0; } + CPPSPMD_FORCE_INLINE bool spmd_none(const vbool& e) { return !spmd_any(e); } + + friend CPPSPMD_FORCE_INLINE exec_mask operator^ (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator& (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator| (const exec_mask& a, const exec_mask& b); + + exec_mask m_exec; + exec_mask m_kernel_exec; + exec_mask m_continue_mask; +#ifdef _DEBUG + bool m_in_loop; +#endif + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return m_exec.get_movemask(); } + + void init(const exec_mask& kernel_exec); + + // Varying bool + + struct vbool + { + __m128i m_value; + + vbool() = default; + + CPPSPMD_FORCE_INLINE vbool(bool value) : m_value(_mm_set1_epi32(value ? UINT32_MAX : 0)) { } + + CPPSPMD_FORCE_INLINE explicit vbool(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const; + CPPSPMD_FORCE_INLINE explicit operator vint() const; + + private: + //vbool& operator=(const vbool&); + }; + + friend vbool operator!(const vbool& v); + + CPPSPMD_FORCE_INLINE vbool& store(vbool& dst, const vbool& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE vbool& store_all(vbool& dst, const vbool& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Varying float + struct vfloat + { + __m128 m_value; + + vfloat() = default; + + CPPSPMD_FORCE_INLINE explicit vfloat(const __m128& v) : m_value(v) { } + + CPPSPMD_FORCE_INLINE vfloat(float value) : m_value(_mm_set1_ps(value)) { } + + CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { } + + private: + //vfloat& operator=(const vfloat&); + }; + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat&& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat&& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Linear ref to floats + struct float_lref + { + float* m_pValue; + + private: + //float_lref& operator=(const float_lref&); + }; + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref&& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref&& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const float_lref& src) + { + return vfloat{ _mm_and_ps(_mm_loadu_ps(src.m_pValue), _mm_castsi128_ps(m_exec.m_mask)) }; + } + + // Varying ref to floats + struct float_vref + { + __m128i m_vindex; + float* m_pValue; + + private: + //float_vref& operator=(const float_vref&); + }; + + // Varying ref to varying float + struct vfloat_vref + { + __m128i m_vindex; + vfloat* m_pValue; + + private: + //vfloat_vref& operator=(const vfloat_vref&); + }; + + // Varying ref to varying int + struct vint_vref + { + __m128i m_vindex; + vint* m_pValue; + + private: + //vint_vref& operator=(const vint_vref&); + }; + + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE vfloat load(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + loaded[i] = src.m_pValue[vindex[i]]; + } + return vfloat{ _mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)loaded)) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_all(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + for (int i = 0; i < 4; i++) + loaded[i] = src.m_pValue[vindex[i]]; + return vfloat{ _mm_load_ps((const float*)loaded) }; + } + + // Linear ref to ints + struct int_lref + { + int* m_pValue; + + private: + //int_lref& operator=(const int_lref&); + }; + + CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + { + _mm_storeu_si128((__m128i *)dst.m_pValue, src.m_value); + } + else + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = stored[i]; + } + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i*)src.m_pValue); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + // Linear ref to int16's + struct int16_lref + { + int16_t* m_pValue; + + private: + //int16_lref& operator=(const int16_lref&); + }; + + CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = static_cast(stored[i]); + } + return dst; + } + + CPPSPMD_FORCE_INLINE const int16_lref& store_all(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[i] = static_cast(stored[i]); + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps( t ), _mm_castsi128_ps(m_exec.m_mask))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ t }; + } + + // Linear ref to constant ints + struct cint_lref + { + const int* m_pValue; + + private: + //cint_lref& operator=(const cint_lref&); + }; + + CPPSPMD_FORCE_INLINE vint load(const cint_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i *)src.m_pValue); + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_lref& src) + { + return vint{ _mm_loadu_si128((const __m128i *)src.m_pValue) }; + } + + // Varying ref to ints + struct int_vref + { + __m128i m_vindex; + int* m_pValue; + + private: + //int_vref& operator=(const int_vref&); + }; + + // Varying ref to constant ints + struct cint_vref + { + __m128i m_vindex; + const int* m_pValue; + + private: + //cint_vref& operator=(const cint_vref&); + }; + + // Varying int + struct vint + { + __m128i m_value; + + vint() = default; + + CPPSPMD_FORCE_INLINE explicit vint(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit vint(const lint &other) : m_value(other.m_value) { } + + CPPSPMD_FORCE_INLINE vint& operator=(const lint& other) { m_value = other.m_value; return *this; } + + CPPSPMD_FORCE_INLINE vint(int value) : m_value(_mm_set1_epi32(value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(float value) : m_value(_mm_set1_epi32((int)value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(const vfloat& other) : m_value(_mm_cvttps_epi32(other.m_value)) { } + + CPPSPMD_FORCE_INLINE explicit operator vbool() const + { + return vbool{ _mm_xor_si128( _mm_load_si128((const __m128i*)g_allones_128), _mm_cmpeq_epi32(m_value, _mm_setzero_si128())) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE int_vref operator[](int* ptr) const + { + return int_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE cint_vref operator[](const int* ptr) const + { + return cint_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE float_vref operator[](float* ptr) const + { + return float_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vfloat_vref operator[](vfloat* ptr) const + { + return vfloat_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vint_vref operator[](vint* ptr) const + { + return vint_vref{ m_value, ptr }; + } + + private: + //vint& operator=(const vint&); + }; + + // Load/store linear int + CPPSPMD_FORCE_INLINE void storeu_linear(int *pDst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_si128((__m128i *)pDst, src.m_value); + else + { + if (mask & 1) pDst[0] = extract_x(src.m_value); + if (mask & 2) pDst[1] = extract_y(src.m_value); + if (mask & 4) pDst[2] = extract_z(src.m_value); + if (mask & 8) pDst[3] = extract_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(int *pDst, const vint& src) + { + _mm_storeu_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(int *pDst, const vint& src) + { + _mm_store_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vint loadu_linear(const int *pSrc) + { + __m128i v = _mm_loadu_si128((const __m128i*)pSrc); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint loadu_linear_all(const int *pSrc) + { + return vint{ _mm_loadu_si128((__m128i*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint load_linear_all(const int *pSrc) + { + return vint{ _mm_load_si128((__m128i*)pSrc) }; + } + + // Load/store linear float + CPPSPMD_FORCE_INLINE void storeu_linear(float *pDst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps((float*)pDst, src.m_value); + else + { + int *pDstI = (int *)pDst; + if (mask & 1) pDstI[0] = extract_ps_x(src.m_value); + if (mask & 2) pDstI[1] = extract_ps_y(src.m_value); + if (mask & 4) pDstI[2] = extract_ps_z(src.m_value); + if (mask & 8) pDstI[3] = extract_ps_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(float *pDst, const vfloat& src) + { + _mm_storeu_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(float *pDst, const vfloat& src) + { + _mm_store_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear(const float *pSrc) + { + __m128 v = _mm_loadu_ps((const float*)pSrc); + + v = _mm_and_ps(v, _mm_castsi128_ps(m_exec.m_mask)); + + return vfloat{ v }; + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear_all(const float *pSrc) + { + return vfloat{ _mm_loadu_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_linear_all(const float *pSrc) + { + return vfloat{ _mm_load_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint& store(vint& dst, const vint& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint& store_all(vint& dst, const vint& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store_all(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load_bytes_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int*)(pSrc + extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int*)(pSrc + extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int*)(pSrc + extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int*)(pSrc + extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE vint load_words_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int16_t*)(pSrc + 2 * extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int16_t*)(pSrc + 2 * extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int16_t*)(pSrc + 2 * extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int16_t*)(pSrc + 2 * extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE void store_strided(int *pDst, uint32_t stride, const vint &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) pDst[0] = extract_x(v.m_value); + if (mask & 2) pDst[stride] = extract_y(v.m_value); + if (mask & 4) pDst[stride*2] = extract_z(v.m_value); + if (mask & 8) pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)pDstF)[0] = extract_ps_x(v.m_value); + if (mask & 2) ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + if (mask & 4) ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + if (mask & 8) ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(int *pDst, uint32_t stride, const vint &v) + { + pDst[0] = extract_x(v.m_value); + pDst[stride] = extract_y(v.m_value); + pDst[stride*2] = extract_z(v.m_value); + pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + ((int *)pDstF)[0] = extract_ps_x(v.m_value); + ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE vint load_strided(const int *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrcF); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_strided(const float *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrc); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE vint load_all_strided(const int *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_load_ss(pSrcF); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_all_strided(const float *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_load_ss(pSrc); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE const vfloat_vref& store(const vfloat_vref& dst, const vfloat& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(_mm_castps_si128(src.m_value)); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(_mm_castps_si128(src.m_value)); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(_mm_castps_si128(src.m_value)); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(_mm_castps_si128(src.m_value)); + + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const vfloat_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vfloat{ _mm_castsi128_ps(k) }; + } + + CPPSPMD_FORCE_INLINE const vint_vref& store(const vint_vref& dst, const vint& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(src.m_value); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(src.m_value); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(src.m_value); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(src.m_value); + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const vint_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src) + { + // TODO: There's surely a better way + __m128i k = _mm_setzero_si128(); + + k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + k = insert_z(k, ((int*)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + k = insert_w(k, ((int*)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + // Linear integer + struct lint + { + __m128i m_value; + + CPPSPMD_FORCE_INLINE explicit lint(__m128i value) + : m_value(value) + { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vint() const + { + return vint{ m_value }; + } + + CPPSPMD_FORCE_INLINE int get_first_value() const + { + return _mm_cvtsi128_si32(m_value); + } + + CPPSPMD_FORCE_INLINE float_lref operator[](float* ptr) const + { + return float_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int_lref operator[](int* ptr) const + { + return int_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int16_lref operator[](int16_t* ptr) const + { + return int16_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE cint_lref operator[](const int* ptr) const + { + return cint_lref{ ptr + get_first_value() }; + } + + private: + //lint& operator=(const lint&); + }; + + CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src) + { + dst.m_value = src.m_value; + return dst; + } + + const lint program_index = lint{ _mm_set_epi32( 3, 2, 1, 0 ) }; + + // SPMD condition helpers + + template + CPPSPMD_FORCE_INLINE void spmd_if(const vbool& cond, const IfBody& ifBody); + + CPPSPMD_FORCE_INLINE void spmd_if_break(const vbool& cond); + + // No breaks, continues, etc. allowed + template + CPPSPMD_FORCE_INLINE void spmd_sif(const vbool& cond, const IfBody& ifBody); + + // No breaks, continues, etc. allowed + template + CPPSPMD_FORCE_INLINE void spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody); + + template + CPPSPMD_FORCE_INLINE void spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody); + + template + CPPSPMD_FORCE_INLINE void spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody); + + template + CPPSPMD_FORCE_INLINE void spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody); + + template + CPPSPMD_FORCE_INLINE void spmd_foreach(int begin, int end, const ForeachBody& foreachBody); + +#ifdef _DEBUG + CPPSPMD_FORCE_INLINE void check_masks(); +#else + CPPSPMD_FORCE_INLINE void check_masks() { } +#endif + + CPPSPMD_FORCE_INLINE void spmd_break(); + CPPSPMD_FORCE_INLINE void spmd_continue(); + + CPPSPMD_FORCE_INLINE void spmd_return(); + + template + CPPSPMD_FORCE_INLINE void spmd_unmasked(const UnmaskedBody& unmaskedBody); + + template + //CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args); + CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args); + + CPPSPMD_FORCE_INLINE void swap(vint &a, vint &b) { vint temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vfloat &a, vfloat &b) { vfloat temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vbool &a, vbool &b) { vbool temp = a; store(a, b); store(b, temp); } + + CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) + { + __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); + __m128 temp = _mm_add_ps(_mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(temp, temp), temp)); + } + + CPPSPMD_FORCE_INLINE int reduce_add(vint v) + { + __m128i k3210 = blendv_mask_epi32(_mm_setzero_si128(), v.m_value, m_exec.m_mask); + __m128i temp = _mm_add_epi32(_mm_shuffle_epi32(k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return extract_x(_mm_add_epi32(_mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(temp), _mm_castsi128_ps(temp))), temp)); + } + + #include "cppspmd_math_declares.h" + +}; // struct spmd_kernel + +using exec_mask = spmd_kernel::exec_mask; +using vint = spmd_kernel::vint; +using int_lref = spmd_kernel::int_lref; +using cint_vref = spmd_kernel::cint_vref; +using cint_lref = spmd_kernel::cint_lref; +using int_vref = spmd_kernel::int_vref; +using lint = spmd_kernel::lint; +using vbool = spmd_kernel::vbool; +using vfloat = spmd_kernel::vfloat; +using float_lref = spmd_kernel::float_lref; +using float_vref = spmd_kernel::float_vref; +using vfloat_vref = spmd_kernel::vfloat_vref; +using vint_vref = spmd_kernel::vint_vref; + +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const +{ + return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; +} + +// Returns UINT32_MAX's for true, 0 for false. (Should it return 1's?) +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const +{ + return vint { m_value }; +} + +CPPSPMD_FORCE_INLINE vbool operator!(const vbool& v) +{ + return vbool{ _mm_castps_si128(_mm_xor_ps(_mm_load_ps((const float*)g_allones_128), _mm_castsi128_ps(v.m_value))) }; +} + +CPPSPMD_FORCE_INLINE exec_mask::exec_mask(const vbool& b) { m_mask = b.m_value; } + +CPPSPMD_FORCE_INLINE exec_mask operator^(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_xor_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator&(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_and_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator|(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_or_si128(a.m_mask, b.m_mask) }; } + +CPPSPMD_FORCE_INLINE bool all(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) != 0; } + +// Bad pattern - doesn't factor in the current exec mask. Prefer spmd_any() instead. +CPPSPMD_FORCE_INLINE bool all(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) != 0; } + +CPPSPMD_FORCE_INLINE exec_mask andnot(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_andnot_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE vbool operator||(const vbool& a, const vbool& b) { return vbool{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator&&(const vbool& a, const vbool& b) { return vbool{ _mm_and_si128(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, const vfloat& b) { return vfloat{ _mm_add_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vfloat& b) { return vfloat{ _mm_sub_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const vfloat& b) { return vfloat(a) + b; } +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, float b) { return a + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vint& b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vint& a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, int b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(int a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, float b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(float a, const vfloat& b) { return vfloat(a) - b; } + +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, const vfloat& b) { return vfloat{ _mm_mul_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, float b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float a, const vfloat& b) { return vfloat(a) * b; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, int b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(int a, const vfloat& b) { return vfloat(a) * b; } + +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, const vfloat& b) { return vfloat{ _mm_div_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, int b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(int a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, float b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(float a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& v) { return vfloat{ _mm_sub_ps(_mm_xor_ps(v.m_value, v.m_value), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, float b) { return a == vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, const vfloat& b) { return !vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, float b) { return a != vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmplt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, float b) { return a < vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpgt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, float b) { return a > vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmple_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, float b) { return a <= vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpge_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, float b) { return a >= vfloat(b); } + +CPPSPMD_FORCE_INLINE vfloat spmd_ternaryf(const vbool& cond, const vfloat& a, const vfloat& b) { return vfloat{ blendv_mask_ps(b.m_value, a.m_value, _mm_castsi128_ps(cond.m_value)) }; } +CPPSPMD_FORCE_INLINE vint spmd_ternaryi(const vbool& cond, const vint& a, const vint& b) { return vint{ blendv_mask_epi32(b.m_value, a.m_value, cond.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat sqrt(const vfloat& v) { return vfloat{ _mm_sqrt_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat abs(const vfloat& v) { return vfloat{ _mm_andnot_ps(_mm_set1_ps(-0.0f), v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat max(const vfloat& a, const vfloat& b) { return vfloat{ _mm_max_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat min(const vfloat& a, const vfloat& b) { return vfloat{ _mm_min_ps(a.m_value, b.m_value) }; } + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU) ); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvttps_epi32(a.m_value); + + __m128 af = _mm_cvtepi32_ps(ai); + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmpgt_ps(af, a.m_value))); + + af = _mm_add_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmplt_ps(af, a.m_value))); + + af = _mm_sub_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +// We need to disable unsafe math optimizations for the key operations used for rounding to nearest. +// I wish there was a better way. +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optimize("-fno-unsafe-math-optimizations"))) +#elif defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optnone)) +#elif defined (_MSC_VER) +#pragma float_control(push) +#pragma float_control(precise, on) +inline __m128 add_sub(__m128 a, __m128 b) +#else +inline __m128 add_sub(__m128 a, __m128 b) +#endif +{ + return _mm_sub_ps(_mm_add_ps(a, b), b); +} + +#if defined (_MSC_VER) +#pragma float_control(pop) +#endif + +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat& a) +{ + __m128i no_fract_fp_bits = _mm_castps_si128(_mm_set1_ps(8388608.0f)); + + __m128i sign_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x80000000U)); + __m128 force_int = _mm_castsi128_ps(_mm_or_si128(no_fract_fp_bits, sign_a)); + + // Can't use individual _mm_add_ps/_mm_sub_ps - this will be optimized out with /fp:fast by clang and probably other compilers. + //__m128 temp1 = _mm_add_ps(a.m_value, force_int); + //__m128 temp2 = _mm_sub_ps(temp1, force_int); + __m128 temp2 = add_sub(a.m_value, force_int); + + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, no_fract_fp_bits); + return vfloat{ blendv_mask_ps(a.m_value, temp2, _mm_castsi128_ps(has_fractional)) }; +} + +#else +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& v) { return vfloat{ _mm_floor_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) { return vfloat{ _mm_ceil_ps(a.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ) }; } +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC ) }; } +#endif + +CPPSPMD_FORCE_INLINE vfloat frac(const vfloat& a) { return a - floor(a); } +CPPSPMD_FORCE_INLINE vfloat fmod(vfloat a, vfloat b) { vfloat c = frac(abs(a / b)) * abs(b); return spmd_ternaryf(a < 0, -c, c); } +CPPSPMD_FORCE_INLINE vfloat sign(const vfloat& a) { return spmd_ternaryf(a < 0.0f, 1.0f, 1.0f); } + +CPPSPMD_FORCE_INLINE vint max(const vint& a, const vint& b) { return vint{ max_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min(const vint& a, const vint& b) { return vint{ min_epi32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint maxu(const vint& a, const vint& b) { return vint{ max_epu32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint minu(const vint& a, const vint& b) { return vint{ min_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint abs(const vint& v) { return vint{ abs_epi32(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint byteswap(const vint& v) { return vint{ shuffle_epi8(v.m_value, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)) }; } + +CPPSPMD_FORCE_INLINE vint cast_vfloat_to_vint(const vfloat& v) { return vint{ _mm_castps_si128(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat cast_vint_to_vfloat(const vint& v) { return vfloat{ _mm_castsi128_ps(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat clamp(const vfloat& v, const vfloat& a, const vfloat& b) +{ + return vfloat{ _mm_min_ps(b.m_value, _mm_max_ps(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vint clamp(const vint& v, const vint& a, const vint& b) +{ + return vint{ min_epi32(b.m_value, max_epi32(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_add_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(c.m_value, _mm_mul_ps(a.m_value, b.m_value)) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_sub_ps(_mm_xor_ps(a.m_value, a.m_value), _mm_mul_ps(a.m_value, b.m_value)), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat lerp(const vfloat &x, const vfloat &y, const vfloat &s) { return vfma(y - x, s, x); } + +CPPSPMD_FORCE_INLINE lint operator+(int a, const lint& b) { return lint{ _mm_add_epi32(_mm_set1_epi32(a), b.m_value) }; } +CPPSPMD_FORCE_INLINE lint operator+(const lint& a, int b) { return lint{ _mm_add_epi32(a.m_value, _mm_set1_epi32(b)) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const lint& b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator+(const lint& a, float b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(const lint& a, float b) { return vfloat(a) * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float b, const lint& a) { return vfloat(a) * vfloat(b); } + +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, const vint& b) { return vint{ _mm_and_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, int b) { return a & vint(b); } +CPPSPMD_FORCE_INLINE vint andnot(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, const vint& b) { return vint{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, int b) { return a | vint(b); } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, const vint& b) { return vint{ _mm_xor_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, int b) { return a ^ vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(const vint& a, const vint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vint& a, const vint& b) { return !vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, const vint& b) { return vint{ _mm_add_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, const vint& b) { return vint{ _mm_sub_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, int b) { return a + vint(b); } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, int b) { return a - vint(b); } +CPPSPMD_FORCE_INLINE vint operator+(int a, const vint& b) { return vint(a) + b; } +CPPSPMD_FORCE_INLINE vint operator-(int a, const vint& b) { return vint(a) - b; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, const vint& b) { return vint{ mullo_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, int b) { return a * vint(b); } +CPPSPMD_FORCE_INLINE vint operator*(int a, const vint& b) { return vint(a) * b; } + +CPPSPMD_FORCE_INLINE vint mulhiu(const vint& a, const vint& b) { return vint{ mulhi_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator-(const vint& v) { return vint{ _mm_sub_epi32(_mm_setzero_si128(), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator~(const vint& a) { return vint{ -a - 1 }; } + +// A few of these break the lane-based abstraction model. They are supported in SSE2, so it makes sense to support them and let the user figure it out. +CPPSPMD_FORCE_INLINE vint adds_epu8(const vint& a, const vint& b) { return vint{ _mm_adds_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu8(const vint& a, const vint& b) { return vint{ _mm_subs_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu8(const vint & a, const vint & b) { return vint{ _mm_avg_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epu8(const vint& a, const vint& b) { return vint{ _mm_max_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epu8(const vint& a, const vint& b) { return vint{ _mm_min_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sad_epu8(const vint& a, const vint& b) { return vint{ _mm_sad_epu8(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi8(const vint& a, const vint& b) { return vint{ _mm_add_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi8(const vint& a, const vint& b) { return vint{ _mm_adds_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi8(const vint& a, const vint& b) { return vint{ _mm_sub_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi8(const vint& a, const vint& b) { return vint{ _mm_subs_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpacklo_epi8(const vint& a, const vint& b) { return vint{ _mm_unpacklo_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpackhi_epi8(const vint& a, const vint& b) { return vint{ _mm_unpackhi_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE int movemask_epi8(const vint& a) { return _mm_movemask_epi8(a.m_value); } +CPPSPMD_FORCE_INLINE int movemask_epi32(const vint& a) { return _mm_movemask_ps(_mm_castsi128_ps(a.m_value)); } + +CPPSPMD_FORCE_INLINE vint cmple_epu8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(_mm_min_epu8(a.m_value, b.m_value), a.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpge_epu8(const vint& a, const vint& b) { return vint{ cmple_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epu8(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(_mm_cmpeq_epi8(a.m_value, b.m_value), _mm_cmpeq_epi8(_mm_max_epu8(a.m_value, b.m_value), a.m_value)) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epu8(const vint& a, const vint& b) { return vint{ cmpgt_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint absdiff_epu8(const vint& a, const vint& b) { return vint{ _mm_or_si128(_mm_subs_epu8(a.m_value, b.m_value), _mm_subs_epu8(b.m_value, a.m_value)) }; } + +CPPSPMD_FORCE_INLINE vint blendv_epi8(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi8(a.m_value, b.m_value, _mm_cmplt_epi8(mask.m_value, _mm_setzero_si128())) }; } +CPPSPMD_FORCE_INLINE vint blendv_epi32(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi32(a.m_value, b.m_value, mask.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi16(const vint& a, const vint& b) { return vint{ _mm_add_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi16(const vint& a, const vint& b) { return vint{ _mm_adds_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epu16(const vint& a, const vint& b) { return vint{ _mm_adds_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu16(const vint& a, const vint& b) { return vint{ _mm_avg_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi16(const vint& a, const vint& b) { return vint{ _mm_sub_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi16(const vint& a, const vint& b) { return vint{ _mm_subs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu16(const vint& a, const vint& b) { return vint{ _mm_subs_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mullo_epi16(const vint& a, const vint& b) { return vint{ _mm_mullo_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epi16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epu16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epi16(const vint& a, const vint& b) { return vint{ _mm_min_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epi16(const vint& a, const vint& b) { return vint{ _mm_max_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint madd_epi16(const vint& a, const vint& b) { return vint{ _mm_madd_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packs_epi16(const vint& a, const vint& b) { return vint{ _mm_packs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packus_epi16(const vint& a, const vint& b) { return vint{ _mm_packus_epi16(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint uniform_shift_left_epi16(const vint& a, const vint& b) { return vint{ _mm_sll_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_arith_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_sra_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_srl_epi16(a.m_value, b.m_value) }; } + +#define VINT_SHIFT_LEFT_EPI16(a, b) vint(_mm_slli_epi16((a).m_value, b)) +#define VINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srai_epi16((a).m_value, b)) +#define VUINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srli_epi16((a).m_value, b)) + +CPPSPMD_FORCE_INLINE vint undefined_vint() { return vint{ _mm_undefined_si128() }; } +CPPSPMD_FORCE_INLINE vfloat undefined_vfloat() { return vfloat{ _mm_undefined_ps() }; } + +CPPSPMD_FORCE_INLINE vint zero_vint() { return vint{ _mm_setzero_si128() }; } +CPPSPMD_FORCE_INLINE vfloat zero_vfloat() { return vfloat{ _mm_setzero_ps() }; } + +CPPSPMD_FORCE_INLINE vint vint_lane_set(int v0, int v1, int v2, int v3) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set(float v0, float v1, float v2, float v3) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vint vint_lane_set_r(int v3, int v2, int v1, int v0) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set_r(float v3, float v2, float v1, float v0) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int32's in each 128-bit lane. +#define VINT_LANE_SHUFFLE_EPI32(a, control) vint(_mm_shuffle_epi32((a).m_value, control)) +#define VFLOAT_LANE_SHUFFLE_PS(a, b, control) vfloat(_mm_shuffle_ps((a).m_value, (b).m_value, control)) + +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int16's in either the high or low 64-bit lane. +#define VINT_LANE_SHUFFLELO_EPI16(a, control) vint(_mm_shufflelo_epi16((a).m_value, control)) +#define VINT_LANE_SHUFFLEHI_EPI16(a, control) vint(_mm_shufflehi_epi16((a).m_value, control)) + +#define VINT_LANE_SHUFFLE_MASK(a, b, c, d) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) +#define VINT_LANE_SHUFFLE_MASK_R(d, c, b, a) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) + +#define VINT_LANE_SHIFT_LEFT_BYTES(a, l) vint(_mm_slli_si128((a).m_value, l)) +#define VINT_LANE_SHIFT_RIGHT_BYTES(a, l) vint(_mm_srli_si128((a).m_value, l)) + +// Unpack and interleave 8-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi8(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi8(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi8(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi8(a.m_value, b.m_value)); } + +// Unpack and interleave 16-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi16(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi16(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi16(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi16(a.m_value, b.m_value)); } + +// Unpack and interleave 32-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi32(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi32(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi32(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi32(a.m_value, b.m_value)); } + +// Unpack and interleave 64-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi64(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi64(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi64(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi64(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint vint_set1_epi8(int8_t a) { return vint(_mm_set1_epi8(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi16(int16_t a) { return vint(_mm_set1_epi16(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi32(int32_t a) { return vint(_mm_set1_epi32(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi64(int64_t a) { return vint(_mm_set1_epi64x(a)); } + +CPPSPMD_FORCE_INLINE vint mul_epu32(const vint &a, const vint& b) { return vint(_mm_mul_epu32(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint div_epi32(const vint &a, const vint& b) +{ + __m128d al = _mm_cvtepi32_pd(a.m_value); + __m128d ah = _mm_cvtepi32_pd(_mm_unpackhi_epi64(a.m_value, a.m_value)); + + __m128d bl = _mm_cvtepi32_pd(b.m_value); + __m128d bh = _mm_cvtepi32_pd(_mm_unpackhi_epi64(b.m_value, b.m_value)); + + __m128d rl = _mm_div_pd(al, bl); + __m128d rh = _mm_div_pd(ah, bh); + + __m128i rli = _mm_cvttpd_epi32(rl); + __m128i rhi = _mm_cvttpd_epi32(rh); + + return vint(_mm_unpacklo_epi64(rli, rhi)); +} + +CPPSPMD_FORCE_INLINE vint mod_epi32(const vint &a, const vint& b) +{ + vint aa = abs(a), ab = abs(b); + vint q = div_epi32(aa, ab); + vint r = aa - q * ab; + return spmd_ternaryi(a < 0, -r, r); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, const vint& b) +{ + return div_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, int b) +{ + return div_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, const vint& b) +{ + return mod_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, int b) +{ + return mod_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) << extract_x(b.m_value); + result[1] = extract_y(a.m_value) << extract_y(b.m_value); + result[2] = extract_z(a.m_value) << extract_z(b.m_value); + result[3] = extract_w(a.m_value) << extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) << extract_x(b.m_value); + int y = extract_y(a.m_value) << extract_y(b.m_value); + int z = extract_z(a.m_value) << extract_z(b.m_value); + int w = extract_w(a.m_value) << extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + // What this does: shift left each b lane by 23 bits (to move the shift amount into the FP exponent position), then epi32 add to the integer rep of 1.0f, then cast that to float, then convert that to int to get fast 2^x. + return a * vint(cast_vint_to_vfloat(vint(_mm_slli_epi32(b.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f)))); +#endif +} + +// uniform shift left +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sll_epi32(a.m_value, bv) }; +} + +// uniform arithmetic shift right +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sra_epi32(a.m_value, bv) }; +} + +// uniform shift right +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_srl_epi32(a.m_value, bv) }; +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = ((uint32_t)extract_x(a.m_value)) >> extract_x(b.m_value); + result[1] = ((uint32_t)extract_y(a.m_value)) >> extract_y(b.m_value); + result[2] = ((uint32_t)extract_z(a.m_value)) >> extract_z(b.m_value); + result[3] = ((uint32_t)extract_w(a.m_value)) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + uint32_t x = ((uint32_t)extract_x(a.m_value)) >> ((uint32_t)extract_x(b.m_value)); + uint32_t y = ((uint32_t)extract_y(a.m_value)) >> ((uint32_t)extract_y(b.m_value)); + uint32_t z = ((uint32_t)extract_z(a.m_value)) >> ((uint32_t)extract_z(b.m_value)); + uint32_t w = ((uint32_t)extract_w(a.m_value)) >> ((uint32_t)extract_w(b.m_value)); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + vint q(mulhi_epu32(a.m_value, r.m_value)); + + // Handle shift amounts of 0. + return spmd_ternaryi(b > 0, q, a); +#endif +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right_not_zero(const vint& a, const vint& b) +{ + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + return vint(mulhi_epu32(a.m_value, r.m_value)); +} + +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) >> extract_x(b.m_value); + result[1] = extract_y(a.m_value) >> extract_y(b.m_value); + result[2] = extract_z(a.m_value) >> extract_z(b.m_value); + result[3] = extract_w(a.m_value) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) >> extract_x(b.m_value); + int y = extract_y(a.m_value) >> extract_y(b.m_value); + int z = extract_z(a.m_value) >> extract_z(b.m_value); + int w = extract_w(a.m_value) >> extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + vint sign_mask(_mm_cmplt_epi32(a.m_value, _mm_setzero_si128())); + vint a_shifted = vuint_shift_right(a ^ sign_mask, b) ^ sign_mask; + return a_shifted; +#endif +} + +#undef VINT_SHIFT_LEFT +#undef VINT_SHIFT_RIGHT +#undef VUINT_SHIFT_RIGHT + +// Shift left/right by a uniform immediate constant +#define VINT_SHIFT_LEFT(a, b) vint(_mm_slli_epi32( (a).m_value, (b) ) ) +#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) +#define VUINT_SHIFT_RIGHT(a, b) vint( _mm_srli_epi32( (a).m_value, (b) ) ) +#define VINT_ROT(x, k) (VINT_SHIFT_LEFT((x), (k)) | VUINT_SHIFT_RIGHT((x), 32 - (k))) + +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, const lint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, int b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(int a, const lint& b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator<(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } + +CPPSPMD_FORCE_INLINE float extract(const vfloat& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) float values[4]; _mm_store_ps(values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const vint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const lint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE bool extract(const vbool& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance] != 0; } + +#undef VINT_EXTRACT +#undef VBOOL_EXTRACT +#undef VFLOAT_EXTRACT + +#if CPPSPMD_SSE2 +// Pass in an immediate constant and the compiler will optimize these expressions. +#define VINT_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VBOOL_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VFLOAT_EXTRACT(v, instance) ( ((instance) == 0) ? extractf_ps_x((v).m_value) : (((instance) == 1) ? extractf_ps_y((v).m_value) : (((instance) == 2) ? extractf_ps_z((v).m_value) : extractf_ps_w((v).m_value))) ) +#else +CPPSPMD_FORCE_INLINE float cast_int_bits_as_float(int v) { return *(const float*)&v; } + +#define VINT_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VBOOL_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VFLOAT_EXTRACT(v, instance) cast_int_bits_as_float(_mm_extract_ps((v).m_value, instance)) +#endif + +CPPSPMD_FORCE_INLINE vfloat &insert(vfloat& v, int instance, float f) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) float values[4]; + _mm_store_ps(values, v.m_value); + values[instance] = f; + v.m_value = _mm_load_ps(values); + return v; +} + +CPPSPMD_FORCE_INLINE vint &insert(vint& v, int instance, int i) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) int values[4]; + _mm_store_si128((__m128i *)values, v.m_value); + values[instance] = i; + v.m_value = _mm_load_si128((__m128i *)values); + return v; +} + +CPPSPMD_FORCE_INLINE vint init_lookup4(const uint8_t pTab[16]) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + return vint{ l }; +} + +CPPSPMD_FORCE_INLINE vint table_lookup4_8(const vint& a, const vint& table) +{ + return vint{ shuffle_epi8(table.m_value, a.m_value) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup5(const uint8_t pTab[32], vint& table_0, vint& table_1) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + __m128i h = _mm_loadu_si128((const __m128i*)(pTab + 16)); + table_0.m_value = l; + table_1.m_value = h; +} + +CPPSPMD_FORCE_INLINE vint table_lookup5_8(const vint& a, const vint& table_0, const vint& table_1) +{ + __m128i l_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i h_0 = shuffle_epi8(table_1.m_value, a.m_value); + + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 v_0 = blendv_ps(_mm_castsi128_ps(l_0), _mm_castsi128_ps(h_0), _mm_castsi128_ps(m_0)); + + return vint{ _mm_castps_si128(v_0) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup6(const uint8_t pTab[64], vint& table_0, vint& table_1, vint& table_2, vint& table_3) +{ + __m128i a = _mm_loadu_si128((const __m128i*)pTab); + __m128i b = _mm_loadu_si128((const __m128i*)(pTab + 16)); + __m128i c = _mm_loadu_si128((const __m128i*)(pTab + 32)); + __m128i d = _mm_loadu_si128((const __m128i*)(pTab + 48)); + + table_0.m_value = a; + table_1.m_value = b; + table_2.m_value = c; + table_3.m_value = d; +} + +CPPSPMD_FORCE_INLINE vint table_lookup6_8(const vint& a, const vint& table_0, const vint& table_1, const vint& table_2, const vint& table_3) +{ + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 av_0; + { + __m128i al_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i ah_0 = shuffle_epi8(table_1.m_value, a.m_value); + av_0 = blendv_ps(_mm_castsi128_ps(al_0), _mm_castsi128_ps(ah_0), _mm_castsi128_ps(m_0)); + } + + __m128 bv_0; + { + __m128i bl_0 = shuffle_epi8(table_2.m_value, a.m_value); + __m128i bh_0 = shuffle_epi8(table_3.m_value, a.m_value); + bv_0 = blendv_ps(_mm_castsi128_ps(bl_0), _mm_castsi128_ps(bh_0), _mm_castsi128_ps(m_0)); + } + + __m128i m2_0 = _mm_slli_epi32(a.m_value, 31 - 5); + __m128 v2_0 = blendv_ps(av_0, bv_0, _mm_castsi128_ps(m2_0)); + + return vint{ _mm_castps_si128(v2_0) }; +} + +#if 0 +template +CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + return kernel._call(std::forward(args)...); +} +#else +template +CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + kernel._call(std::forward(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::init(const spmd_kernel::exec_mask& kernel_exec) +{ + m_exec = kernel_exec; + m_kernel_exec = kernel_exec; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + m_in_loop = false; +#endif +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +#include "cppspmd_flow.h" +#include "cppspmd_math.h" + +} // namespace cppspmd_sse41 diff --git a/Source/ThirdParty/basis_universal/encoder/cppspmd_type_aliases.h b/Source/ThirdParty/basis_universal/encoder/cppspmd_type_aliases.h new file mode 100644 index 000000000..260048123 --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/cppspmd_type_aliases.h @@ -0,0 +1,47 @@ +// cppspmd_type_aliases.h +// Do not include this file directly +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifndef CPPSPMD_TYPES +#define CPPSPMD_TYPES + +using exec_mask = CPPSPMD::exec_mask; + +#if CPPSPMD_INT16 +using vint16 = CPPSPMD::vint16; +using int16_lref = CPPSPMD::int16_lref; +using cint16_vref = CPPSPMD::cint16_vref; +using int16_vref = CPPSPMD::int16_vref; +using lint16 = CPPSPMD::lint16; +using vint16_vref = CPPSPMD::vint16_vref; +#else +using vint = CPPSPMD::vint; +using int_lref = CPPSPMD::int_lref; +using cint_vref = CPPSPMD::cint_vref; +using int_vref = CPPSPMD::int_vref; +using lint = CPPSPMD::lint; +using vint_vref = CPPSPMD::vint_vref; +#endif + +using vbool = CPPSPMD::vbool; +using vfloat = CPPSPMD::vfloat; +using float_lref = CPPSPMD::float_lref; +using float_vref = CPPSPMD::float_vref; +using vfloat_vref = CPPSPMD::vfloat_vref; + +#endif // CPPSPMD_TYPES diff --git a/Source/ThirdParty/basis_universal/encoder/jpgd.h b/Source/ThirdParty/basis_universal/encoder/jpgd.h new file mode 100644 index 000000000..bd1ad808c --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/jpgd.h @@ -0,0 +1,352 @@ +// jpgd.h - C++ class for JPEG decompression. +// Dual licensed: Public domain, Rich Geldreich , or Apache 2.0 (see jpgd.cpp) +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include +#include +#ifndef __wasi__ +#include +#endif +#include +#include + +#ifdef _MSC_VER +#define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define JPGD_NORETURN __attribute__ ((noreturn)) +#else +#define JPGD_NORETURN +#endif + +#define JPGD_HUFF_TREE_MAX_LENGTH 512 +#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256 + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); + jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&); + + FILE* m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char* Pfilename); + void close(); + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8* m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8* pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + enum + { + cFlagLinearChromaFiltering = 1 + }; + + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = cFlagLinearChromaFiltering); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder&); + jpeg_decoder& operator =(const jpeg_decoder&); + + typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH]; + uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; + }; + + struct coeff_buf + { + uint8* pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block* m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + // TODO: we can get rid of longjmp entirely +#ifndef __wasi__ + jmp_buf m_jmp_state; +#endif + uint32_t m_flags; + mem_block* m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream* m_pStream; + + int m_progressive_flag; + + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_num_buffered_scanlines; + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + + uint8 m_in_buf_pad_start[64]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[64]; + + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + uint8* m_pSample_buf_prev; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + int m_total_bytes_read; + + bool m_ready_flag; + bool m_eof_flag; + bool m_sample_buf_prev_valid; + + inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; } + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void* alloc(size_t n, bool zero = false); + void word_clear(void* p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream* pStream, uint32_t flags); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables* pH); + void check_quant_tables(); + void check_huff_tables(); + bool calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream* pStream, uint32_t flags); + void H2V2Convert(); + uint32_t H2V2ConvertFiltered(); + void H2V1Convert(); + void H2V1ConvertFiltered(); + void H1V2Convert(); + void H1V2ConvertFiltered(); + void H1V1Convert(); + void gray_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool* pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables* pH); + inline int huff_decode(huff_tables* pH, int& extrabits); + + // Clamps a value between 0-255. + static inline uint8 clamp(int i) + { + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + return static_cast(i); + } + int decode_next_mcu_row(); + + static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/Source/ThirdParty/basis_universal/encoder/pvpngreader.h b/Source/ThirdParty/basis_universal/encoder/pvpngreader.h new file mode 100644 index 000000000..b1850f1bb --- /dev/null +++ b/Source/ThirdParty/basis_universal/encoder/pvpngreader.h @@ -0,0 +1,48 @@ +// pngreader.h - Public Domain - see unlicense at bottom of pvpngreader.cpp +#pragma once +#include + +namespace pv_png +{ + // PNG color types + enum + { + PNG_COLOR_TYPE_GREYSCALE = 0, + PNG_COLOR_TYPE_TRUECOLOR = 2, + PNG_COLOR_TYPE_PALETTIZED = 3, + PNG_COLOR_TYPE_GREYSCALE_ALPHA = 4, + PNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6 + }; + + // PNG file description + struct png_info + { + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_chans; // The number of channels, factoring in transparency. Ranges from [1-4]. + + uint32_t m_bit_depth; // PNG ihdr bit depth: 1, 2, 4, 8 or 16 + uint32_t m_color_type; // PNG ihdr color type, PNG_COLOR_TYPE_GRAYSCALE etc. + + bool m_has_gamma; // true if the PNG file had a GAMA chunk + uint32_t m_gamma_value; // PNG GAMA chunk value, scaled by 100000 + + bool m_has_trns; // true if the PNG file used colorkey transparency + }; + + // Retrieved information about the PNG file. + // Returns false on any errors. + bool get_png_info(const void* pImage_buf, size_t buf_size, png_info& info); + + // Input parameters: + // pImage_buf, buf_size - pointer to PNG image data + // desired_chans - desired number of output channels. 0=auto, 1=grayscale, 2=grayscale alpha, 3=24bpp RGB, 4=32bpp RGBA + // + // Output parameters: + // width, height - PNG image resolution + // num_chans - actual number of channels in PNG, from [1,4] (factoring in transparency) + // + // Returns nullptr on any errors. + void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, uint32_t &width, uint32_t &height, uint32_t& num_chans); +} diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu.h b/Source/ThirdParty/basis_universal/transcoder/basisu.h new file mode 100644 index 000000000..6f7e25842 --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu.h @@ -0,0 +1,821 @@ +// basisu.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef BASISD_SUPPORT_XUASTC +#define BASISD_SUPPORT_XUASTC 1 +#endif + +#ifdef _MSC_VER + + #pragma warning (disable : 4201) + #pragma warning (disable : 4127) // warning C4127: conditional expression is constant + #pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled. + +#endif // _MSC_VER + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "basisu_containers.h" + +// We never use min/max macros, slam them to off. +#ifdef max +#undef max +#endif + +#ifdef min +#undef min +#endif + +#ifdef _WIN32 +#define strcasecmp _stricmp +#endif + +// Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES +// Do not check in as 1! +#define BASISU_FORCE_DEVEL_MESSAGES 0 +#endif + +#define BASISU_NOTE_UNUSED(x) (void)(x) +#define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete; +#define BASISU_ASSUME(x) static_assert(x, #x); +#define BASISU_OFFSETOF(s, m) offsetof(s, m) +#define BASISU_STRINGIZE(x) #x +#define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x) + +#if BASISU_FORCE_DEVEL_MESSAGES + #define BASISU_DEVEL_ERROR(...) do { basisu::debug_printf(__VA_ARGS__); } while(0) +#else + #define BASISU_DEVEL_ERROR(...) +#endif + +namespace basisu +{ + // Types/utilities + +#ifdef _WIN32 + const char BASISU_PATH_SEPERATOR_CHAR = '\\'; +#else + const char BASISU_PATH_SEPERATOR_CHAR = '/'; +#endif + + typedef basisu::vector uint8_vec; + typedef basisu::vector int16_vec; + typedef basisu::vector uint16_vec; + typedef basisu::vector uint_vec; + typedef basisu::vector size_t_vec; + typedef basisu::vector uint64_vec; + typedef basisu::vector int_vec; + typedef basisu::vector bool_vec; + typedef basisu::vector float_vec; + typedef basisu::vector double_vec; + + void enable_debug_printf(bool enabled); + void debug_printf(const char *pFmt, ...); + void debug_puts(const char* p); + + template + inline void fmt_debug_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return; + debug_puts(res.c_str()); + } + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + + template inline void clear_obj(T& obj) { memset((void *)&obj, 0, sizeof(obj)); } + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + + constexpr double cPiD = 3.14159265358979323846264338327950288; + constexpr float REALLY_SMALL_FLOAT_VAL = .000000125f; + constexpr float SMALL_FLOAT_VAL = .0000125f; + constexpr float BIG_FLOAT_VAL = 1e+30f; + + template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } + + inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + inline float saturate(float value) { return clampf(value, 0, 1.0f); } + inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } + inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } + inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } + inline float minimumf(float a, float b) { return (a < b) ? a : b; } + inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } + inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } + inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } + inline float maximumf(float a, float b) { return (a > b) ? a : b; } + inline int squarei(int i) { return i * i; } + inline float squaref(float i) { return i * i; } + inline double squared(double i) { return i * i; } + template inline T square(T a) { return a * a; } + template inline T sign(T a) { return (a < 0) ? (T)-1 : ((a == 0) ? (T)0 : (T)1); } + + inline int imod(int i, int d) + { + assert(i != INT_MIN); + + if (i >= 0) + return i % d; + + int r = (-i) % d; + return (r == 0) ? 0 : d - r; + } + + inline uint8_t safe_cast_uint8(uint32_t x) + { + assert(x <= UINT8_MAX); + return (uint8_t)x; + } + + inline int8_t safe_cast_int8(int32_t x) + { + assert((x >= INT8_MIN) && (x <= INT8_MAX)); + return (int8_t)x; + } + + inline uint16_t safe_cast_uint16(uint32_t x) + { + assert(x <= UINT16_MAX); + return (uint16_t)x; + } + + inline int16_t safe_cast_int16(int32_t x) + { + assert((x >= INT16_MIN) && (x <= INT16_MAX)); + return (int16_t)x; + } + + inline bool equal_tol(float a, float b, float t) { return fabsf(a - b) <= ((maximum(fabsf(a), fabsf(b)) + 1.0f) * t); } + inline bool equal_tol(double a, double b, double t) { return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } + + template + inline T prev_wrap(T i, T n) + { + T temp = i - 1; + if (temp < 0) + temp = n - 1; + return temp; + } + + template + inline T next_wrap(T i, T n) + { + T temp = i + 1; + if (temp >= n) + temp = 0; + return temp; + } + + inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } + + inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + + template inline T range_check(T v, T minv, T maxv) { assert(v >= minv && v <= maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } + template inline T range_check(T v, T maxv) { assert(v <= maxv); BASISU_NOTE_UNUSED(maxv); return v; } + + template inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } + template inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } + + // Open interval + inline bool is_in_bounds(int v, int l, int h) + { + return (v >= l) && (v < h); + } + + // Closed interval + inline bool is_in_range(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + + inline bool is_in_range(float v, float l, float h) + { + return (v >= l) && (v <= h); + } + + inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; } + + template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } + + inline uint32_t get_bit(uint32_t src, int ndx) + { + assert(is_in_bounds(ndx, 0, 32)); + return (src >> ndx) & 1; + } + + inline bool is_bit_set(uint32_t src, int ndx) + { + return get_bit(src, ndx) != 0; + } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert(is_in_range(num_bits, 1, 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { + if (n) + { + if (vec.size()) + { + assert((pObjs + n) <= vec.begin() || (pObjs >= vec.end())); + } + const size_t cur_s = vec.size(); + vec.resize(cur_s + n); + memcpy(&vec[cur_s], pObjs, sizeof(R) * n); + } + } + + template inline void append_vector(T &vec, const T &other_vec) + { + assert(&vec != &other_vec); + if (other_vec.size()) + append_vector(vec, &other_vec[0], other_vec.size()); + } + + template inline void vector_ensure_element_is_valid(T &vec, size_t idx) + { + if (idx >= vec.size()) + vec.resize(idx + 1); + } + + template inline void vector_sort(T &vec) + { + if (vec.size()) + std::sort(vec.begin(), vec.end()); + } + + template inline bool unordered_set_contains(T& set, const U&obj) + { + return set.find(obj) != set.end(); + } + + template int vector_find(const T &vec, const typename T::value_type &obj) + { + assert(vec.size() <= INT_MAX); + for (size_t i = 0; i < vec.size(); i++) + if (vec[i] == obj) + return static_cast(i); + return -1; + } + + template void vector_set_all(T &vec, const typename T::value_type &obj) + { + for (size_t i = 0; i < vec.size(); i++) + vec[i] = obj; + } + + inline uint64_t read_be64(const void *p) + { + uint64_t val = 0; + for (uint32_t i = 0; i < 8; i++) + val |= (static_cast(static_cast(p)[7 - i]) << (i * 8)); + return val; + } + + inline void write_be64(void *p, uint64_t x) + { + for (uint32_t i = 0; i < 8; i++) + static_cast(p)[7 - i] = static_cast(x >> (i * 8)); + } + + static inline uint16_t byteswap16(uint16_t x) { return static_cast((x << 8) | (x >> 8)); } + static inline uint32_t byteswap32(uint32_t x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t b = 0; + for (; v > 1U; ++b) + v >>= 1; + return b; + } + + inline uint32_t ceil_log2i(uint32_t v) + { + uint32_t b = floor_log2i(v); + if ((b != 32) && (v > (1U << b))) + ++b; + return b; + } + + inline int posmod(int x, int y) + { + if (x >= 0) + return (x < y) ? x : (x % y); + int m = (-x) % y; + return (m != 0) ? (y - m) : m; + } + + inline float posmodf(float x, float y) + { + float m = fmodf(x, y); + if (m < 0.0f) + m += y; + return m; + } + + inline bool do_excl_ranges_overlap(int la, int ha, int lb, int hb) + { + assert(la < ha && lb < hb); + if ((ha <= lb) || (la >= hb)) return false; + return true; + } + + static inline uint32_t read_le_word(const uint8_t* pBytes) + { + return (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline uint32_t read_le_dword(const uint8_t *pBytes) + { + return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline void write_le_dword(uint8_t* pBytes, uint32_t val) + { + pBytes[0] = (uint8_t)val; + pBytes[1] = (uint8_t)(val >> 8U); + pBytes[2] = (uint8_t)(val >> 16U); + pBytes[3] = (uint8_t)(val >> 24U); + } + + // Always little endian 1-8 byte unsigned int + template + struct packed_uint + { + uint8_t m_bytes[NumBytes]; + + inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } + inline packed_uint(uint64_t v) { *this = v; } + inline packed_uint(const packed_uint& other) { *this = other; } + + inline packed_uint& operator= (uint64_t v) + { + // TODO: Add assert on truncation? + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; + } + + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + return *this; + } + + inline uint64_t get_uint64() const + { + // Some compilers may warn about this code. It clearly cannot access beyond the end of the m_bytes struct here. + if IF_CONSTEXPR (NumBytes == 1) + { + return m_bytes[0]; + } + else if IF_CONSTEXPR (NumBytes == 2) + { + return (m_bytes[1] << 8U) | m_bytes[0]; + } + else if IF_CONSTEXPR (NumBytes == 3) + { + return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; + } + else if IF_CONSTEXPR (NumBytes == 4) + { + return read_le_dword(m_bytes); + } + else if IF_CONSTEXPR (NumBytes == 5) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if IF_CONSTEXPR (NumBytes == 6) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if IF_CONSTEXPR (NumBytes == 7) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if IF_CONSTEXPR (NumBytes == 8) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = read_le_dword(m_bytes + 4); + return static_cast(l) | (static_cast(h) << 32U); + } + else + { + static_assert(NumBytes <= 8, "Invalid NumBytes"); + return 0; + } + } + + inline uint32_t get_uint32() const + { + static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use get_uint32"); + return static_cast(get_uint64()); + } + + inline operator uint32_t() const + { + static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use operator uint32_t"); + return static_cast(get_uint64()); + } + }; + + enum eZero { cZero }; + enum eNoClamp { cNoClamp }; + + // Rice/Huffman entropy coding + + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. + enum + { + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, + cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, + + // Small zero runs + cHuffmanSmallZeroRunSizeMin = 3, cHuffmanSmallZeroRunSizeMax = 10, cHuffmanSmallZeroRunExtraBits = 3, + + // Big zero run + cHuffmanBigZeroRunSizeMin = 11, cHuffmanBigZeroRunSizeMax = 138, cHuffmanBigZeroRunExtraBits = 7, + + // Small non-zero run + cHuffmanSmallRepeatSizeMin = 3, cHuffmanSmallRepeatSizeMax = 6, cHuffmanSmallRepeatExtraBits = 2, + + // Big non-zero run + cHuffmanBigRepeatSizeMin = 7, cHuffmanBigRepeatSizeMax = 134, cHuffmanBigRepeatExtraBits = 7, + + cHuffmanTotalCodelengthCodes = 21, cHuffmanSmallZeroRunCode = 17, cHuffmanBigZeroRunCode = 18, cHuffmanSmallRepeatCode = 19, cHuffmanBigRepeatCode = 20 + }; + + static const uint8_t g_huffman_sorted_codelength_codes[] = { cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 }; + const uint32_t cHuffmanTotalSortedCodelengthCodes = sizeof(g_huffman_sorted_codelength_codes) / sizeof(g_huffman_sorted_codelength_codes[0]); + + // GPU texture formats and various uncompressed texture formats. + + enum class texture_format + { + cInvalidTextureFormat = -1, + + // Block-based formats + cETC1, // ETC1 + cETC1S, // ETC1 (subset: diff colors only, no subblocks) + cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) + cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block + cETC2_ALPHA, // ETC2 EAC alpha block + cBC1, // DXT1 + cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) + cBC4, // DXT5A + cBC5, // 3DC/DXN (two BC4/DXT5A blocks) + cBC6HSigned, // HDR + cBC6HUnsigned, // HDR + cBC7, + cASTC_LDR_4x4, // ASTC 4x4 LDR only + cASTC_HDR_4x4, // ASTC 4x4 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this) + cASTC_HDR_6x6, // ASTC 6x6 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this) + cPVRTC1_4_RGB, + cPVRTC1_4_RGBA, + cATC_RGB, + cATC_RGBA_INTERPOLATED_ALPHA, + cFXT1_RGB, + cPVRTC2_4_RGBA, + cETC2_R11_EAC, + cETC2_RG11_EAC, + cUASTC4x4, + cUASTC_HDR_4x4, + cBC1_NV, + cBC1_AMD, + + // Uncompressed/raw pixels + cRGBA32, + cRGB565, + cBGR565, + cRGBA4444, + cABGR4444, + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5, + + // All remaining ASTC LDR block size variants (other than 4x4 which is above). There are 14 total ASTC block sizes, including 4x4. + cASTC_LDR_5x4, + cASTC_LDR_5x5, + cASTC_LDR_6x5, + cASTC_LDR_6x6, + cASTC_LDR_8x5, + cASTC_LDR_8x6, + cASTC_LDR_10x5, + cASTC_LDR_10x6, + cASTC_LDR_8x8, + cASTC_LDR_10x8, + cASTC_LDR_10x10, + cASTC_LDR_12x10, + cASTC_LDR_12x12 + }; + + inline bool is_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_LDR_5x4: + case texture_format::cASTC_LDR_5x5: + case texture_format::cASTC_LDR_6x5: + case texture_format::cASTC_LDR_6x6: + case texture_format::cASTC_LDR_8x5: + case texture_format::cASTC_LDR_8x6: + case texture_format::cASTC_LDR_10x5: + case texture_format::cASTC_LDR_10x6: + case texture_format::cASTC_LDR_8x8: + case texture_format::cASTC_LDR_10x8: + case texture_format::cASTC_LDR_10x10: + case texture_format::cASTC_LDR_12x10: + case texture_format::cASTC_LDR_12x12: + return true; + default: + break; + } + return false; + } + + inline bool is_hdr_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + return true; + default: + break; + } + return false; + } + + inline bool is_ldr_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_LDR_5x4: + case texture_format::cASTC_LDR_5x5: + case texture_format::cASTC_LDR_6x5: + case texture_format::cASTC_LDR_6x6: + case texture_format::cASTC_LDR_8x5: + case texture_format::cASTC_LDR_8x6: + case texture_format::cASTC_LDR_10x5: + case texture_format::cASTC_LDR_10x6: + case texture_format::cASTC_LDR_8x8: + case texture_format::cASTC_LDR_10x8: + case texture_format::cASTC_LDR_10x10: + case texture_format::cASTC_LDR_12x10: + case texture_format::cASTC_LDR_12x12: + return true; + default: + break; + } + return false; + } + + inline bool is_uncompressed_texture_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cRGBA32: + case texture_format::cRGB565: + case texture_format::cBGR565: + case texture_format::cRGBA4444: + case texture_format::cABGR4444: + case texture_format::cRGBA_HALF: + case texture_format::cRGB_HALF: + case texture_format::cRGB_9E5: + return true; + default: + break; + } + + return false; + } + + inline bool is_block_based_texture_format(texture_format fmt) + { + return !is_uncompressed_texture_format(fmt); + } + + // This is bytes per block for GPU formats, or bytes per texel for uncompressed formats. + inline uint32_t get_bytes_per_block_or_pixel(texture_format fmt) + { + switch (fmt) + { + case texture_format::cETC1: + case texture_format::cETC1S: + case texture_format::cETC2_RGB: + case texture_format::cETC2_ALPHA: + case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + case texture_format::cBC4: + case texture_format::cPVRTC1_4_RGB: + case texture_format::cPVRTC1_4_RGBA: + case texture_format::cATC_RGB: + case texture_format::cPVRTC2_4_RGBA: + case texture_format::cETC2_R11_EAC: + return 8; + case texture_format::cRGBA32: + case texture_format::cRGB_9E5: + return sizeof(uint32_t); + case texture_format::cRGB_HALF: + return sizeof(uint16_t) * 3; + case texture_format::cRGBA_HALF: + return sizeof(uint16_t) * 4; + case texture_format::cRGB565: + case texture_format::cBGR565: + case texture_format::cRGBA4444: + case texture_format::cABGR4444: + return sizeof(uint16_t); + + default: + break; + } + + // Everything else is 16 bytes/block. + return 16; + } + + // This is qwords per block for GPU formats, or not valid for uncompressed formats. + inline uint32_t get_qwords_per_block(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + const uint32_t bytes_per_block = get_bytes_per_block_or_pixel(fmt); + return bytes_per_block >> 3; + } + + inline uint32_t get_block_width(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + switch (fmt) + { + case texture_format::cFXT1_RGB: return 8; + case texture_format::cASTC_HDR_6x6: return 6; + case texture_format::cASTC_LDR_5x4: return 5; + case texture_format::cASTC_LDR_5x5: return 5; + case texture_format::cASTC_LDR_6x5: return 6; + case texture_format::cASTC_LDR_6x6: return 6; + case texture_format::cASTC_LDR_8x5: return 8; + case texture_format::cASTC_LDR_8x6: return 8; + case texture_format::cASTC_LDR_10x5: return 10; + case texture_format::cASTC_LDR_10x6: return 10; + case texture_format::cASTC_LDR_8x8: return 8; + case texture_format::cASTC_LDR_10x8: return 10; + case texture_format::cASTC_LDR_10x10: return 10; + case texture_format::cASTC_LDR_12x10: return 12; + case texture_format::cASTC_LDR_12x12: return 12; + default: + break; + } + return 4; + } + + inline uint32_t get_block_height(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + switch (fmt) + { + case texture_format::cASTC_HDR_6x6: return 6; + case texture_format::cASTC_LDR_5x5: return 5; + case texture_format::cASTC_LDR_6x5: return 5; + case texture_format::cASTC_LDR_6x6: return 6; + case texture_format::cASTC_LDR_8x5: return 5; + case texture_format::cASTC_LDR_8x6: return 6; + case texture_format::cASTC_LDR_10x5: return 5; + case texture_format::cASTC_LDR_10x6: return 6; + case texture_format::cASTC_LDR_8x8: return 8; + case texture_format::cASTC_LDR_10x8: return 8; + case texture_format::cASTC_LDR_10x10: return 10; + case texture_format::cASTC_LDR_12x10: return 10; + case texture_format::cASTC_LDR_12x12: return 12; + default: + break; + } + return 4; + } + + inline bool is_hdr_texture_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cRGBA_HALF: + case texture_format::cRGB_HALF: + case texture_format::cRGB_9E5: + return true; + default: + break; + } + + return false; + } + + inline bool is_ldr_texture_format(texture_format fmt) + { + return !is_hdr_texture_format(fmt); + } + + inline texture_format get_astc_ldr_texture_format(uint32_t width, uint32_t height) + { +#define BU_ASTC_LDR_MATCH_BLOCK_DIM(x, y, f) if ((width == (x)) && (height == (y))) return (f); + BU_ASTC_LDR_MATCH_BLOCK_DIM(4, 4, texture_format::cASTC_LDR_4x4); + BU_ASTC_LDR_MATCH_BLOCK_DIM(5, 4, texture_format::cASTC_LDR_5x4); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(5, 5, texture_format::cASTC_LDR_5x5); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(6, 5, texture_format::cASTC_LDR_6x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(6, 6, texture_format::cASTC_LDR_6x6); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 5, texture_format::cASTC_LDR_8x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 6, texture_format::cASTC_LDR_8x6); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 5, texture_format::cASTC_LDR_10x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 6, texture_format::cASTC_LDR_10x6); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 8, texture_format::cASTC_LDR_8x8); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 8, texture_format::cASTC_LDR_10x8); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 10, texture_format::cASTC_LDR_10x10); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(12, 10, texture_format::cASTC_LDR_12x10); + BU_ASTC_LDR_MATCH_BLOCK_DIM(12, 12, texture_format::cASTC_LDR_12x12); +#undef BU_ASTC_LDR_MATCH_BLOCK_DIM + + return texture_format::cInvalidTextureFormat; + } + + inline bool is_valid_astc_block_size(uint32_t width, uint32_t height) + { + return get_astc_ldr_texture_format(width, height) != texture_format::cInvalidTextureFormat; + } + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_astc_hdr_core.h b/Source/ThirdParty/basis_universal/transcoder/basisu_astc_hdr_core.h new file mode 100644 index 000000000..143e29e95 --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_astc_hdr_core.h @@ -0,0 +1,208 @@ +// File: basisu_astc_hdr_core.h +#pragma once + +namespace basist +{ + struct astc_blk + { + uint8_t m_vals[16]; + }; + + // ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded. + // If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader. + const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL + + // Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's. + const uint32_t MAX_QLOG7 = 123; + //const float MAX_QLOG7_VAL = 55296.0f; + + const uint32_t MAX_QLOG8 = 247; + //const float MAX_QLOG8_VAL = 60416.0f; + + const uint32_t MAX_QLOG9 = 495; + //const float MAX_QLOG9_VAL = 62976.0f; + + const uint32_t MAX_QLOG10 = 991; + //const float MAX_QLOG10_VAL = 64256.0f; + + const uint32_t MAX_QLOG11 = 1983; + //const float MAX_QLOG11_VAL = 64896.0f; + + const uint32_t MAX_QLOG12 = 3967; + //const float MAX_QLOG12_VAL = 65216.0f; + + const uint32_t MAX_QLOG16 = 63487; + const float MAX_QLOG16_VAL = 65504.0f; + + // TODO: Should be called something like "NUM_MODE11_ENDPOINT_VALUES" + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; + + // This is not lossless + inline half_float qlog_to_half(uint32_t qlog, uint32_t bits) + { + assert((bits >= 7U) && (bits <= 16U)); + assert(qlog < (1U << bits)); + + int C = qlog << (16 - bits); + return astc_helpers::qlog16_to_half(C); + } + + void astc_hdr_core_init(); + + void decode_mode7_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale); + + bool decode_mode7_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale, + uint32_t ise_endpoint_range); + + void decode_mode11_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3]); + + bool decode_mode11_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + uint32_t ise_endpoint_range); + + bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + + bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk); + bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk); + + namespace astc_6x6_hdr + { + const uint32_t MAX_ASTC_HDR_6X6_DIM = 32768; + const int32_t REUSE_MAX_BUFFER_ROWS = 5; // 1+-(-4), so we need to buffer 5 rows total + + struct block_mode_desc + { + bool m_dp; + uint32_t m_cem; + uint32_t m_num_partitions; + uint32_t m_grid_x; + uint32_t m_grid_y; + + // the coding ISE ranges (which may not be valid ASTC ranges for this configuration) + uint32_t m_endpoint_ise_range; + uint32_t m_weight_ise_range; + + // the physical/output ASTC decompression ISE ranges (i.e. what the decompressor must output) + uint32_t m_transcode_endpoint_ise_range; + uint32_t m_transcode_weight_ise_range; + + uint32_t m_flags; + int m_dp_channel; + }; + + // Lack of level flag indicates level 3+ + const uint32_t BASIST_HDR_6X6_LEVEL0 = 1; + const uint32_t BASIST_HDR_6X6_LEVEL1 = 2; + const uint32_t BASIST_HDR_6X6_LEVEL2 = 4; + + const uint32_t TOTAL_BLOCK_MODE_DECS = 75; + extern const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS]; + + const uint32_t UASTC_6x6_HDR_SIG0 = 0xABCD; // original release + const uint32_t UASTC_6x6_HDR_SIG1 = 0xABCE; // 2x2->4x4 weight grid upsampling change, not backwards compatible with older decoders + + void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk, bool orig_behavior); + + enum class encoding_type + { + cInvalid = -1, + cRun = 0, + cSolid = 1, + cReuse = 2, + cBlock = 3, + cTotal + }; + + const uint32_t REUSE_XY_DELTA_BITS = 5; + const uint32_t NUM_REUSE_XY_DELTAS = 1 << REUSE_XY_DELTA_BITS; + + struct reuse_xy_delta + { + int8_t m_x, m_y; + }; + + extern const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS]; + + const uint32_t RUN_CODE = 0b000, RUN_CODE_LEN = 3; + const uint32_t SOLID_CODE = 0b100, SOLID_CODE_LEN = 3; + const uint32_t REUSE_CODE = 0b10, REUSE_CODE_LEN = 2; + const uint32_t BLOCK_CODE = 0b1, BLOCK_CODE_LEN = 1; + + enum class endpoint_mode + { + cInvalid = -1, + + cRaw = 0, + cUseLeft, + cUseUpper, + cUseLeftDelta, + cUseUpperDelta, + + cTotal + }; + + enum class block_mode + { + cInvalid = -1, + + cBMTotalModes = TOTAL_BLOCK_MODE_DECS + }; + + const uint32_t NUM_ENDPOINT_DELTA_BITS = 5; + + const uint32_t NUM_UNIQUE_PARTITIONS2 = 521; + extern const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2]; + + const uint32_t NUM_UNIQUE_PARTITIONS3 = 333; + extern const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3]; + + bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues); + + void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range); + + void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints); + + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2 = 2; + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_4 = 4; + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_9 = 9; + + struct fast_bc6h_params + { + uint32_t m_num_diff_endpoint_modes_to_try; + uint32_t m_max_2subset_pats_to_try; + + bool m_hq_ls; + bool m_brute_force_weight4_assignment; + + fast_bc6h_params() + { + init(); + } + + void init() + { + m_hq_ls = true; + m_num_diff_endpoint_modes_to_try = BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2; + m_max_2subset_pats_to_try = 1; + m_brute_force_weight4_assignment = false; + } + }; + + void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params ¶ms); + + bool decode_6x6_hdr(const uint8_t* pComp_data, uint32_t comp_data_size, basisu::vector2D& decoded_blocks, uint32_t& width, uint32_t& height); + + } // namespace astc_6x6_hdr + +} // namespace basist + diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_astc_helpers.h b/Source/ThirdParty/basis_universal/transcoder/basisu_astc_helpers.h new file mode 100644 index 000000000..32b16087a --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_astc_helpers.h @@ -0,0 +1,4849 @@ +// basisu_astc_helpers.h +// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header. +#ifndef BASISU_ASTC_HELPERS_HEADER +#define BASISU_ASTC_HELPERS_HEADER + +#include +#include +#include +#include + +namespace astc_helpers +{ + const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid + const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels + const uint32_t MAX_BLOCK_PIXELS = MAX_BLOCK_DIM * MAX_BLOCK_DIM; + const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values + const uint32_t MAX_CEM_ENDPOINT_VALS = 8; // see Table 94. ASTC LDR/HDR color endpoint modes (max 8 values to encode any CEM, minimum 2) + + // The number of BISE values needed to encode endpoints for each CEM. + const uint32_t NUM_MODE0_ENDPOINTS = 2, NUM_MODE4_ENDPOINTS = 4; + const uint32_t NUM_MODE6_ENDPOINTS = 4, NUM_MODE8_ENDPOINTS = 6, NUM_MODE9_ENDPOINTS = 6; // LDR RGB + const uint32_t NUM_MODE10_ENDPOINTS = 6, NUM_MODE12_ENDPOINTS = 8, NUM_MODE13_ENDPOINTS = 8; // LDR RGBA + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; // hdr + + const uint32_t MAX_WEIGHTS = 32; // max supported # of weights (or "selectors") in any mode, i.e. the max # of colors per endpoint pair + const uint32_t MAX_WEIGHT_INTERPOLANT_VALUE = 64; // grid texel weights must range from [0,64], i.e. the weight interpolant range is [0,64] + + // 14 unique block dimensions supported by ASTC + static const uint32_t NUM_ASTC_BLOCK_SIZES = 14; + extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2]; + + // The Color Endpoint Modes (CEM's) + enum cems + { + CEM_LDR_LUM_DIRECT = 0, + CEM_LDR_LUM_BASE_PLUS_OFS = 1, + CEM_HDR_LUM_LARGE_RANGE = 2, + CEM_HDR_LUM_SMALL_RANGE = 3, + CEM_LDR_LUM_ALPHA_DIRECT = 4, + CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5, + CEM_LDR_RGB_BASE_SCALE = 6, + CEM_HDR_RGB_BASE_SCALE = 7, + CEM_LDR_RGB_DIRECT = 8, + CEM_LDR_RGB_BASE_PLUS_OFFSET = 9, + CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10, + CEM_HDR_RGB = 11, + CEM_LDR_RGBA_DIRECT = 12, + CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13, + CEM_HDR_RGB_LDR_ALPHA = 14, + CEM_HDR_RGB_HDR_ALPHA = 15 + }; + + // All Bounded Integer Sequence Coding (BISE or ISE) ranges. + // Weights: Ranges [0,11] are valid. + // Endpoints: Ranges [4,20] are valid. + enum bise_levels + { + BISE_2_LEVELS = 0, + BISE_3_LEVELS = 1, + BISE_4_LEVELS = 2, + BISE_5_LEVELS = 3, + BISE_6_LEVELS = 4, + BISE_8_LEVELS = 5, + BISE_10_LEVELS = 6, + BISE_12_LEVELS = 7, + BISE_16_LEVELS = 8, + BISE_20_LEVELS = 9, + BISE_24_LEVELS = 10, + BISE_32_LEVELS = 11, + BISE_40_LEVELS = 12, + BISE_48_LEVELS = 13, + BISE_64_LEVELS = 14, + BISE_80_LEVELS = 15, + BISE_96_LEVELS = 16, + BISE_128_LEVELS = 17, + BISE_160_LEVELS = 18, + BISE_192_LEVELS = 19, + BISE_256_LEVELS = 20 + }; + + const uint32_t TOTAL_ISE_RANGES = 21; + + enum + { + cBLOCK_SIZE_4x4 = 0, // 16 samples + cBLOCK_SIZE_5x4 = 1, // 20 samples + cBLOCK_SIZE_5x5 = 2, // 25 samples + cBLOCK_SIZE_6x5 = 3, // 30 samples + + cBLOCK_SIZE_6x6 = 4, // 36 samples + cBLOCK_SIZE_8x5 = 5, // 40 samples + cBLOCK_SIZE_8x6 = 6, // 48 samples + cBLOCK_SIZE_10x5 = 7, // 50 samples + + cBLOCK_SIZE_10x6 = 8, // 60 samples + cBLOCK_SIZE_8x8 = 9, // 64 samples + cBLOCK_SIZE_10x8 = 10, // 80 samples + cBLOCK_SIZE_10x10 = 11, // 100 samples + + cBLOCK_SIZE_12x10 = 12, // 120 samples + cBLOCK_SIZE_12x12 = 13, // 144 samples + + cTOTAL_BLOCK_SIZES = 14 + }; + + // Valid endpoint ISE ranges + const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4 + const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20 + const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1; + + // Valid weight ISE ranges + const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0 + const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11 + const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1; + + // The ISE range table. + extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1) + + // Possible Color Component Select values, used in dual plane mode. + // The CCS component will be interpolated using the 2nd weight plane. + enum ccs + { + CCS_GBA_R = 0, + CCS_RBA_G = 1, + CCS_RGA_B = 2, + CCS_RGB_A = 3 + }; + + struct astc_block + { + uint32_t m_vals[4]; + }; + + const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode + const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode + const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits) + const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block + + struct log_astc_block + { + bool m_error_flag; + + bool m_solid_color_flag_ldr, m_solid_color_flag_hdr; + + uint8_t m_user_mode; // user defined value, not used in this module + + // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr + uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block + + bool m_dual_plane; + + uint8_t m_weight_ise_range; // 0-11 + uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking + + uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode + + uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode) + uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1 + + uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's + + union + { + // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc. + uint8_t m_weights[MAX_GRID_WEIGHTS]; + uint16_t m_solid_color[4]; + }; + + // ISE endpoint values + // Endpoint order examples: + // 1 subset LA : LL0 LH0 AL0 AH0 + // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 + // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 + // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1 + // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1 + // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1 + uint8_t m_endpoints[MAX_ENDPOINTS]; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + }; + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + // Returns the number of levels in the given ISE range. + inline uint32_t get_ise_levels(uint32_t ise_range) + { + assert(ise_range < TOTAL_ISE_RANGES); + return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0]; + } + + inline int get_ise_sequence_bits(int count, int range) + { + // See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.) + int total_bits = g_ise_range_table[range][0] * count; + total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5; + total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3; + return total_bits; + } + + inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w) + { + assert(w <= MAX_WEIGHT_INTERPOLANT_VALUE); + return (l * (64 - w) + h * w + 32) >> 6; + } + + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr); + + struct pack_stats + { + uint32_t m_header_bits; + uint32_t m_endpoint_bits; + uint32_t m_weight_bits; + + inline pack_stats() { clear(); } + inline void clear() { memset(this, 0, sizeof(*this)); } + }; + + enum + { + cValidateEarlyOutAtEndpointISEChecks = 1, + cValidateSkipFinalEndpointWeightPacking = 2, + }; + + // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions. + bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr, uint32_t validate_flags = 0); + + // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component. + void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr); + + // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's) + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr); + + // These helpers are all quite slow, but are useful for table preparation. + + // Dequantizes ISE encoded endpoint val to [0,255] + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11 + + // Dequantizes ISE encoded weight val to [0,64] + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10 + + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range); + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range); + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag); // false if block endpoints, true if weights + + // True if the CEM is LDR. + bool is_cem_ldr(uint32_t mode); + inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); } + + bool does_cem_have_alpha(uint32_t mode); + + // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp). + bool is_valid_block_size(uint32_t w, uint32_t h); + + // w/h must be a valid ASTC block size, or it returns cBLOCK_SIZE_4x4 + uint32_t get_block_size_index(uint32_t w, uint32_t h); + + float get_bitrate_from_block_size(uint32_t w, uint32_t h); + + uint32_t get_texel_partition_from_table(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t subsets, uint32_t x, uint32_t y); + + bool block_has_any_hdr_cems(const log_astc_block& log_blk); + bool block_has_any_ldr_cems(const log_astc_block& log_blk); + + // Returns the # of endpoint values for the given CEM. + inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); } + + struct dequant_table + { + basisu::vector m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + basisu::vector m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + basisu::vector m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels] + basisu::vector m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + + void init(bool weight_flag, uint32_t num_levels) + { + m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256); + m_ISE_to_val.resize(num_levels); + m_ISE_to_rank.resize(num_levels); + m_rank_to_ISE.resize(num_levels); + } + + uint32_t get_rank_to_val(uint32_t rank) const + { + const uint32_t ise = m_rank_to_ISE[rank]; + const uint32_t val = m_ISE_to_val[ise]; + return val; + } + + uint32_t get_val_to_rank(uint32_t val) + { + const uint32_t ise = m_val_to_ise[val]; + const uint32_t rank = m_ISE_to_rank[ise]; + return rank; + } + }; + + struct dequant_tables + { + dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES]; + dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES]; + bool m_initialized_flag = false; + + const dequant_table& get_weight_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + dequant_table& get_weight_tab(uint32_t range) + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + const dequant_table& get_endpoint_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + dequant_table& get_endpoint_tab(uint32_t range) + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + void init() + { + if (m_initialized_flag) + return; + + for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_weight_tab(range); + + tab.init(true, num_levels); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, true); + } + + for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_endpoint_tab(range); + + tab.init(false, num_levels); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, false); + } + + m_initialized_flag = true; + } + }; + + extern dequant_tables g_dequant_tables; + void init_tables(); + + struct weighted_sample + { + uint8_t m_src_x; + uint8_t m_src_y; + uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8 + }; + + void compute_upsample_weights( + int block_width, int block_height, + int weight_grid_width, int weight_grid_height, + weighted_sample* pWeights); // there will be block_width * block_height bilinear samples + + void upsample_weight_grid( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights); // [by][bx] + + void upsample_weight_grid_xuastc_ldr( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights0, // [by][bx] + const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights1); // [by][bx] + + bool is_small_block(uint32_t block_width, uint32_t block_height); + + // Procedurally returns the texel partition/subset index given the block coordinate and config (very slow). + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block); + + // Returns the texel partition/subset index given the block coordinate and config - table lookup, but currently ONLY 2-3 SUBSETS to save RAM. + int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions); + + void blue_contract( + int r, int g, int b, int a, + int& dr, int& dg, int& db, int& da); + + void bit_transfer_signed(int& a, int& b); + + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE); + + typedef uint16_t half_float; + half_float float_to_half(float val, bool toward_zero); + float half_to_float(half_float hval); + + // Notes: + // qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless) + // However, this is not lossless in the general sense. + inline half_float qlog16_to_half(int k) + { + assert((k >= 0) && (k <= 0xFFFF)); + + int E = (k & 0xF800) >> 11; + int M = k & 0x7FF; + + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + + return (half_float)((E << 10) + (Mt >> 3)); + } + + const int MAX_RGB9E5 = 0xff80; + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b); + uint32_t pack_rgb9e5(float r, float g, float b); + + enum decode_mode + { + cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks + cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks + cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks + cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + }; + + // Decodes logical block to output pixels. + // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16) + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode); + + // Assuming the ASTC logical block is valid, this checks for the extra XUASTC LDR constraints. + bool is_block_xuastc_ldr(const log_astc_block& log_blk); + + // XUASTC LDR only - primary assumption is the logical block comes directly from our supercompressor. DO NOT call on general ASTC blocks. + bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode, const uint8_t* pUpsampled_weights_to_use = nullptr, uint32_t start_x = 0, uint32_t start_y = 0, uint32_t end_x = 0, uint32_t end_y = 0); + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs); + + // Unpack a physical ASTC encoded GPU texture block to a logical block description. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height); + + uint8_t& get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t idx); + uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t idx); + void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index); + void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index); + uint32_t get_total_weights(const log_astc_block& log_block); + + uint8_t* get_endpoints(log_astc_block& log_block, uint32_t partition_index); + const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index); + + const char* get_cem_name(uint32_t cem_index); + bool cem_is_ldr_direct(uint32_t cem_index); + bool cem_is_ldr_base_scale(uint32_t cem_index); + bool cem_is_ldr_base_plus_ofs(uint32_t cem_index); + + bool cem_supports_bc(uint32_t cem); + + void bit_transfer_signed_dec(int& a, int& b); + void bit_transfer_signed_enc(int& a, int& b); + + bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + + uint32_t get_base_cem_without_alpha(uint32_t cem); + + int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta); + + // index range: [0,NUM_ASTC_BLOCK_SIZES-1] + void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height); + + // -1 if invalid + int find_astc_block_size_index(uint32_t width, uint32_t height); + + // 8-bit linear8 or sRGB8, le/he are [0,255], w is [0,64] + inline int channel_interpolate(int le, int he, int w, bool astc_srgb_decode) + { + assert((w >= 0) && (w <= 64)); + assert((le >= 0) && (le <= 255)); + assert((he >= 0) && (he <= 255)); + + if (astc_srgb_decode) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + return astc_helpers::weight_interpolate(le, he, w) >> 8; + } + +} // namespace astc_helpers + +#endif // BASISU_ASTC_HELPERS_HEADER + +//------------------------------------------------------------------ + +#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION + +namespace astc_helpers +{ + template inline T my_min(T a, T b) { return (a < b) ? a : b; } + template inline T my_max(T a, T b) { return (a > b) ? a : b; } + + const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { + { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, + { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, + { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, + { 12, 10 }, { 12, 12 } + }; + + const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] = + { + //b t q + //2 3 5 // rng ise_index notes + { 1, 0, 0 }, // 0..1 0 + { 0, 1, 0 }, // 0..2 1 + { 2, 0, 0 }, // 0..3 2 + { 0, 0, 1 }, // 0..4 3 + { 1, 1, 0 }, // 0..5 4 min endpoint ISE index + { 3, 0, 0 }, // 0..7 5 + { 1, 0, 1 }, // 0..9 6 + { 2, 1, 0 }, // 0..11 7 + { 4, 0, 0 }, // 0..15 8 + { 2, 0, 1 }, // 0..19 9 + { 3, 1, 0 }, // 0..23 10 + { 5, 0, 0 }, // 0..31 11 max weight ISE index + { 3, 0, 1 }, // 0..39 12 + { 4, 1, 0 }, // 0..47 13 + { 6, 0, 0 }, // 0..63 14 + { 4, 0, 1 }, // 0..79 15 + { 5, 1, 0 }, // 0..95 16 + { 7, 0, 0 }, // 0..127 17 + { 5, 0, 1 }, // 0..159 18 + { 6, 1, 0 }, // 0..191 19 + { 8, 0, 0 }, // 0..255 20 + }; + + static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; + } + } + + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + + // Writes bits to output in an endian safe way + static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits) + { + assert(total_bits <= 31); + assert(value < (1u << total_bits)); + + uint8_t* pBytes = reinterpret_cast(pOutput); + + while (total_bits) + { + const uint32_t bits_to_write = my_min(total_bits, 8 - (bit_pos & 7)); + + pBytes[bit_pos >> 3] |= static_cast(value << (bit_pos & 7)); + + bit_pos += bits_to_write; + total_bits -= bits_to_write; + value >>= bits_to_write; + } + } + + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/ + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats) + { + // First extract the quints and the bits from the 3 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + + if (pStats) + *pStats += n * 3 + 7; + } + + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + 191, 223, 124, 125, 126 }; + + // Encodes 5 values to output, usable for any range that uses trits and bits + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats) + { + // First extract the trits and the bits from the 5 input values + int trits = 0, bits[5]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 5; i++) + { + static const int s_muls[5] = { 1, 3, 9, 27, 81 }; + + const int t = pValues[i] >> n; + + trits += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(trits < 243); + const int T = g_astc_trit_encode[trits]; + + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); + + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); + + if (pStats) + *pStats += n * 5 + 8; + } + + // Packs values using ASTC's BISE to output buffer. + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats) + { + uint32_t temp[5] = { 0 }; + + const int num_bits = g_ise_range_table[range][0]; + + int group_size = 0; + if (g_ise_range_table[range][1]) + group_size = 5; + else if (g_ise_range_table[range][2]) + group_size = 3; + +#ifndef NDEBUG + const uint32_t num_levels = get_ise_levels(range); + for (int i = 0; i < num_vals; i++) + { + assert(pSrc_vals[i] < num_levels); + } +#endif + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) + { + uint8_t vals[5] = { 0 }; + + const int limit = my_min(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + // Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed. + // get_ise_sequence_bits() returns the # of bits that must be written for proper decoding. + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits, pStats); + else + astc_encode_quints(temp, vals, bit_pos, num_bits, pStats); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + + if (pStats) + *pStats += num_vals * num_bits; + } + + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + inline uint32_t rev_dword(uint32_t bits) + { + uint32_t v = (bits << 16) | (bits >> 16); + v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4); + v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1); + return v; + } + + static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); } + + static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits) + { + config_bits = 0; + + const int W = log_block.m_grid_width, H = log_block.m_grid_height; + + const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision + const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits + + // See Tables 81-82 + // Compute p from weight range + uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0); + + // Rearrange p's bits to p0 p2 p1 + p = (p >> 1) + ((p & 1) << 2); + + // Try encoding each row of table 82. + + // W+4 H+2 + if (is_packable(W - 4, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3); + return true; + } + + // W+8 H+2 + if (is_packable(W - 8, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3); + return true; + } + + // W+2 H+8 + if (is_packable(W - 2, 2) && is_packable(H - 8, 2)) + { + config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3); + return true; + } + + // W+2 H+6 + if (is_packable(W - 2, 2) && is_packable(H - 6, 1)) + { + config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // W+2 H+2 + if (is_packable(W - 2, 1) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // 12 H+2 + if ((W == 12) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2); + return true; + } + + // W+2 12 + if ((H == 12) && is_packable(W - 2, 2)) + { + config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2); + return true; + } + + // 6 10 + if ((W == 6) && (H == 10)) + { + config_bits = (Dp_P << 9) | (3 << 7) | (p << 2); + return true; + } + + // 10 6 + if ((W == 10) && (H == 6)) + { + config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2); + return true; + } + + // W+6 H+6 (no dual plane or high prec) + if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2)) + { + config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2); + return true; + } + + // Failed: unsupported weight grid dimensions or config. + return false; + } + + bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats, uint32_t validate_flags) + { + // Basic sanity checking + if (!log_block.m_dual_plane) + { + assert(log_block.m_color_component_selector == 0); + } + else + { + assert(log_block.m_color_component_selector <= 3); + } + + memset(&phys_block, 0, sizeof(phys_block)); + + if (pExpected_endpoint_range) + *pExpected_endpoint_range = -1; + + assert(!log_block.m_error_flag); + if (log_block.m_error_flag) + return false; + + if (log_block.m_solid_color_flag_ldr) + { + pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); + return true; + } + else if (log_block.m_solid_color_flag_hdr) + { + pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); + return true; + } + + if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS)) + return false; + + // Max usable weight range is 11 + if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE) + return false; + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + return false; + + if (log_block.m_color_component_selector > 3) + return false; + + // TODO: sanity check grid width/height vs. block's physical width/height + + uint32_t config_bits = 0; + if (!get_config_bits(log_block, config_bits)) + return false; + + uint32_t bit_pos = 0; + astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11); + if (pStats) + pStats->m_header_bits += 11; + + const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + uint32_t total_extra_bits = 0; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2); + if (pStats) + pStats->m_header_bits += 2; + + if (log_block.m_num_partitions > 1) + { + if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10); + if (pStats) + pStats->m_header_bits += 10; + + uint32_t highest_cem = 0, lowest_cem = UINT32_MAX; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]); + lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]); + } + + if (highest_cem > 15) + return false; + + // Ensure CEM range is contiguous + if (((highest_cem >> 2) > (1 + (lowest_cem >> 2)))) + return false; + + // See tables 79/80 + uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2; + if (lowest_cem != highest_cem) + { + encoded_cem = my_min(3, 1 + (lowest_cem >> 2)); + + // See tables at 23.11 Color Endpoint Mode + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + const int M = log_block.m_color_endpoint_modes[j] & 3; + + const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1); + if ((C & 1) != C) + return false; + + encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j)); + } + + total_extra_bits = 3 * log_block.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits); + if (pStats) + pStats->m_header_bits += total_extra_bits; + } + + astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6); + if (pStats) + pStats->m_header_bits += 6; + } + else + { + if (log_block.m_partition_id) + return false; + if (log_block.m_color_endpoint_modes[0] > 15) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4); + if (pStats) + pStats->m_header_bits += 4; + } + + if (log_block.m_dual_plane) + { + if (log_block.m_num_partitions > 3) + return false; + + total_extra_bits += 2; + + uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2); + if (pStats) + pStats->m_header_bits += 2; + } + + const uint32_t total_config_bits = bit_pos + total_extra_bits; + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + if (validate_flags & cValidateEarlyOutAtEndpointISEChecks) + return true; + + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int bits = get_ise_sequence_bits(total_cem_vals, k); + if (bits <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + // Ensure the caller utilized the right endpoint ISE range. + if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range) + { + if (pExpected_endpoint_range) + *pExpected_endpoint_range = endpoint_ise_range; + return false; + } + + if (pStats) + { + pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range); + pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); + } + + if (validate_flags & cValidateSkipFinalEndpointWeightPacking) + return true; + + // Pack endpoints forwards + encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range); + + // Pack weights backwards + uint32_t weight_data[4] = { 0 }; + encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range); + + for (uint32_t i = 0; i < 4; i++) + phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]); + + return true; + } + + static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits) + { + assert(num_src_bits <= num_dst_bits); + assert((src & ((1 << num_src_bits) - 1)) == src); + + uint32_t dst = 0; + for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits) + dst |= (shift >= 0) ? (src << shift) : (src >> -shift); + + return dst; + } + + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range) + { + assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + + switch (ise_range) + { + case 5: + { + u = bit_replication_scale(val, 3, 8); + break; + } + case 8: + { + u = bit_replication_scale(val, 4, 8); + break; + } + case 11: + { + u = bit_replication_scale(val, 5, 8); + break; + } + case 14: + { + u = bit_replication_scale(val, 6, 8); + break; + } + case 17: + { + u = bit_replication_scale(val, 7, 8); + break; + } + case 20: + { + u = val; + break; + } + case 4: + case 6: + case 7: + case 9: + case 10: + case 12: + case 13: + case 15: + case 16: + case 18: + case 19: + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2; + + assert(range_index >= 0 && range_index <= 10); + + uint32_t bits = val & ((1 << num_bits) - 1); + uint32_t tval = val >> num_bits; + + assert(tval < (num_trits ? 3U : 5U)); + + uint32_t a = bits & 1; + uint32_t b = (bits >> 1) & 1; + uint32_t c = (bits >> 2) & 1; + uint32_t d = (bits >> 3) & 1; + uint32_t e = (bits >> 4) & 1; + uint32_t f = (bits >> 5) & 1; + + uint32_t A = a ? 511 : 0; + uint32_t B = 0; + + switch (range_index) + { + case 2: + { + // 876543210 + // b000b0bb0 + B = (b << 1) | (b << 2) | (b << 4) | (b << 8); + break; + } + case 3: + { + // 876543210 + // b0000bb00 + B = (b << 2) | (b << 3) | (b << 8); + break; + } + case 4: + { + // 876543210 + // cb000cbcb + B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8); + break; + } + case 5: + { + // 876543210 + // cb0000cbc + B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8); + break; + } + case 6: + { + // 876543210 + // dcb000dcb + B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 7: + { + // 876543210 + // dcb0000dc + B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 8: + { + // 876543210 + // edcb000ed + B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 9: + { + // 876543210 + // edcb0000e + B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 10: + { + // 876543210 + // fedcb000f + B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8); + break; + } + default: + break; + } + + static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; + uint32_t C = C_vals[range_index]; + uint32_t D = tval; + + u = D * C + B; + u = u ^ A; + u = (A & 0x80) | (u >> 2); + + break; + } + default: + { + assert(0); + break; + } + } + + return u; + } + + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range) + { + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + switch (ise_range) + { + case 0: + { + u = val ? 63 : 0; + break; + } + case 1: // 0-2 + { + const uint8_t s_tab_0_2[3] = { 0, 32, 63 }; + u = s_tab_0_2[val]; + break; + } + case 2: // 0-3 + { + u = bit_replication_scale(val, 2, 6); + break; + } + case 3: // 0-4 + { + const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 }; + u = s_tab_0_4[val]; + break; + } + case 5: // 0-7 + { + u = bit_replication_scale(val, 3, 6); + break; + } + case 8: // 0-15 + { + u = bit_replication_scale(val, 4, 6); + break; + } + case 11: // 0-31 + { + u = bit_replication_scale(val, 5, 6); + break; + } + case 4: // 0-5 + case 6: // 0-9 + case 7: // 0-11 + case 9: // 0-19 + case 10: // 0-23 + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = num_bits * 2 + (num_quints ? 1 : 0); + + // Extract bits and tris/quints from value + const uint32_t bits = val & ((1u << num_bits) - 1); + const uint32_t D = val >> num_bits; + + assert(D < (num_trits ? 3U : 5U)); + + // Now dequantize + // See Table 103. ASTC weight unquantization parameters + static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 }; + + const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1; + + const uint32_t A = (a == 0) ? 0 : 0x7F; + + uint32_t B = 0; + if (range_index == 4) + B = ((b << 6) | (b << 2) | (b << 0)); + else if (range_index == 5) + B = ((b << 6) | (b << 1)); + else if (range_index == 6) + B = ((c << 6) | (b << 5) | (c << 1) | (b << 0)); + + const uint32_t C = C_table[range_index - 2]; + + u = D * C + B; + u = u ^ A; + u = (A & 0x20) | (u >> 2); + break; + } + default: + assert(0); + break; + } + + if (u > 32) + u++; + + return u; + } + + // Returns the nearest ISE symbol given a [0,255] endpoint value. + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_endpoint(i, ise_range); + int e = (int)labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + // Returns the nearest ISE weight given a [0,64] endpoint value. + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + assert(v <= (int)MAX_WEIGHT_INTERPOLANT_VALUE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_weight(i, ise_range); + int e = (int)labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag) // false if block endpoints, true if weights + { + const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256; + + for (uint32_t i = 0; i < num_dequant_vals; i++) + { + uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range); + + if (pVal_to_ise) + pVal_to_ise[i] = (uint8_t)bise_index; + + if (pISE_to_val) + pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range); + } + + if (pISE_to_rank || pRank_to_ISE) + { + const uint32_t num_levels = get_ise_levels(ise_range); + + if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2]) + { + // Only bits + for (uint32_t i = 0; i < num_levels; i++) + { + if (pISE_to_rank) + pISE_to_rank[i] = (uint8_t)i; + + if (pRank_to_ISE) + pRank_to_ISE[i] = (uint8_t)i; + } + } + else + { + // Range has trits or quints + uint32_t vals[256]; + for (uint32_t i = 0; i < num_levels; i++) + { + uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range); + + // Low=ISE value + // High=dequantized value + vals[i] = (v << 16) | i; + } + + // Sorts by dequantized value + std::sort(vals, vals + num_levels); + + for (uint32_t rank = 0; rank < num_levels; rank++) + { + uint32_t ise_val = (uint8_t)vals[rank]; + + if (pISE_to_rank) + pISE_to_rank[ise_val] = (uint8_t)rank; + + if (pRank_to_ISE) + pRank_to_ISE[rank] = (uint8_t)ise_val; + } + } + } + } + + void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + pDst[1] = 0b11111101; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + + if (pStats) + pStats->m_header_bits += 16 + 64; + } + + // rh-ah are half-floats + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + + if (pStats) + pStats->m_header_bits += 8 + 64; + } + + bool is_cem_ldr(uint32_t mode) + { + switch (mode) + { + case CEM_LDR_LUM_DIRECT: + case CEM_LDR_LUM_BASE_PLUS_OFS: + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + case CEM_LDR_RGB_BASE_SCALE: + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + return true; + default: + break; + } + + return false; + } + + bool does_cem_have_alpha(uint32_t mode) + { + switch (mode) + { + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + case CEM_HDR_RGB_LDR_ALPHA: + case CEM_HDR_RGB_HDR_ALPHA: + return true; + default: + break; + } + + return false; + } + + bool is_valid_block_size(uint32_t w, uint32_t h) + { +#define BU_ASTC_HELPERS_SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; + BU_ASTC_HELPERS_SIZECHK(4, 4); // 0 + BU_ASTC_HELPERS_SIZECHK(5, 4); // 1 + + BU_ASTC_HELPERS_SIZECHK(5, 5); // 2 + + BU_ASTC_HELPERS_SIZECHK(6, 5); // 3 + BU_ASTC_HELPERS_SIZECHK(6, 6); // 4 + + BU_ASTC_HELPERS_SIZECHK(8, 5); // 5 + BU_ASTC_HELPERS_SIZECHK(8, 6); // 6 + BU_ASTC_HELPERS_SIZECHK(10, 5); // 7 + BU_ASTC_HELPERS_SIZECHK(10, 6); // 8 + + BU_ASTC_HELPERS_SIZECHK(8, 8); // 9 + BU_ASTC_HELPERS_SIZECHK(10, 8); // 10 + BU_ASTC_HELPERS_SIZECHK(10, 10); // 11 + + BU_ASTC_HELPERS_SIZECHK(12, 10); // 12 + BU_ASTC_HELPERS_SIZECHK(12, 12); // 13 +#undef BU_ASTC_HELPERS_SIZECHK + + return false; + } + + uint32_t get_block_size_index(uint32_t w, uint32_t h) + { + assert(is_valid_block_size(w, h)); + + const uint32_t t = w * h; + + if (t <= 36) + { + if (t == 36) + return cBLOCK_SIZE_6x6; + else if (t == 16) + return cBLOCK_SIZE_4x4; + else if (t == 25) + return cBLOCK_SIZE_5x5; + else if (t == 20) + return cBLOCK_SIZE_5x4; + else if (t == 30) + return cBLOCK_SIZE_6x5; + } + else if (t <= 64) + { + if (t == 64) + return cBLOCK_SIZE_8x8; + else if (t == 60) + return cBLOCK_SIZE_10x6; + else if (t == 50) + return cBLOCK_SIZE_10x5; + else if (t == 48) + return cBLOCK_SIZE_8x6; + else if (t == 40) + return cBLOCK_SIZE_8x5; + } + else + { + if (t == 80) + return cBLOCK_SIZE_10x8; + else if (t == 100) + return cBLOCK_SIZE_10x10; + else if (t == 120) + return cBLOCK_SIZE_12x10; + else if (t == 144) + return cBLOCK_SIZE_12x12; + } + + assert(0); + return cBLOCK_SIZE_4x4; + } + + // returns the standard ASTC bitrates given a valid block size from the ASTC spec. + // 0=invalid block size + float get_bitrate_from_block_size(uint32_t w, uint32_t h) + { +#define BU_ASTC_HELPERS_BLOCK_BITRATE(x, y, b) if ((w == (x)) && (h == (y))) return (b); + BU_ASTC_HELPERS_BLOCK_BITRATE(4, 4, 8.0f); + BU_ASTC_HELPERS_BLOCK_BITRATE(5, 4, 6.4f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(5, 5, 5.12f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(6, 5, 4.27f); + BU_ASTC_HELPERS_BLOCK_BITRATE(6, 6, 3.56f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 5, 3.20f); + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 6, 2.67f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 5, 2.56f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 6, 2.13f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 8, 2.00f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 8, 1.60f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 10, 1.28f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(12, 10, 1.07f); + BU_ASTC_HELPERS_BLOCK_BITRATE(12, 12, .89f); +#undef BU_ASTC_HELPERS_BLOCK_BITRATE + + return 0.0f; + } + + bool block_has_any_hdr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + bool block_has_any_ldr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + dequant_tables g_dequant_tables; + + void precompute_texel_partitions(); + + // TODO: this is called twice when using the encoder, first init_rank_tabs=false then init_rank_tabs=true. + void init_tables() + { + g_dequant_tables.init(); + + precompute_texel_partitions(); + } + + void compute_upsample_weights( + int block_width, int block_height, + int weight_grid_width, int weight_grid_height, + weighted_sample* pWeights) // there will be block_width * block_height bilinear samples + { + const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1); + const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1); + + for (int texelY = 0; texelY < block_height; texelY++) + { + for (int texelX = 0; texelX < block_width; texelX++) + { + const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6; + const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6; + const uint32_t jX = gX >> 4; + const uint32_t jY = gY >> 4; + const uint32_t fX = gX & 0xf; + const uint32_t fY = gY & 0xf; + const uint32_t w11 = (fX * fY + 8) >> 4; + const uint32_t w10 = fY - w11; + const uint32_t w01 = fX - w11; + const uint32_t w00 = 16 - fX - fY + w11; + + weighted_sample& s = pWeights[texelX + texelY * block_width]; + s.m_src_x = (uint8_t)jX; + s.m_src_y = (uint8_t)jY; + s.m_weights[0][0] = (uint8_t)w00; + s.m_weights[0][1] = (uint8_t)w01; + s.m_weights[1][0] = (uint8_t)w10; + s.m_weights[1][1] = (uint8_t)w11; + } + } + } + + // Should be dequantized [0,64] weights + void upsample_weight_grid( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights) // [by][bx] + { + assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); + assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); + + const uint32_t total_src_weights = wx * wy; + const uint32_t total_dst_weights = bx * by; + + if (total_src_weights == total_dst_weights) + { + assert((bx == wx) && (by == wy)); + + memcpy(pDst_weights, pSrc_weights, total_src_weights); + return; + } + + weighted_sample weights[12 * 12]; + compute_upsample_weights(bx, by, wx, wy, weights); + + const weighted_sample* pS = weights; + + for (uint32_t y = 0; y < by; y++) + { + for (uint32_t x = 0; x < bx; x++, ++pS) + { + const uint32_t w00 = pS->m_weights[0][0]; + const uint32_t w01 = pS->m_weights[0][1]; + const uint32_t w10 = pS->m_weights[1][0]; + const uint32_t w11 = pS->m_weights[1][1]; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = pS->m_src_x, sy = pS->m_src_y; + + uint32_t total = 8; + if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00; + if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01; + if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10; + if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11; + + pDst_weights[x + y * bx] = (uint8_t)(total >> 4); + } + } + } + + void upsample_weight_grid_xuastc_ldr( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights0, // [by][bx] + const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights1) // [by][bx] + { + assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); + assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); + + assert((bx != wx) || (by != wy)); + + const uint32_t scaleX = (1024 + bx / 2) / (bx - 1); + const uint32_t scaleY = (1024 + by / 2) / (by - 1); + + const uint32_t gYUInc = scaleY * (wy - 1); + const uint32_t gXUInc = scaleX * (wx - 1); + + uint32_t gYU = 32; + for (uint32_t texel_y = 0; texel_y < by; texel_y++) + { + const uint32_t gY = gYU >> 6; + gYU += gYUInc; + + const uint32_t jY = gY >> 4; + const uint32_t fY = gY & 0xf; + + uint32_t gXU = 32; + for (uint32_t texel_x = 0; texel_x < bx; texel_x++) + { + const uint32_t gX = gXU >> 6; + gXU += gXUInc; + + const uint32_t jX = gX >> 4; + const uint32_t fX = gX & 0xf; + + const uint32_t w11 = (fX * fY + 8) >> 4; + const uint32_t w10 = fY - w11; + const uint32_t w01 = fX - w11; + const uint32_t w00 = 16 - fX - fY + w11; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = jX, sy = jY; + + { + uint32_t total0 = 8; + + if (w00) total0 += pSrc_weights0[sx + sy * wx] * w00; + if (w01) total0 += pSrc_weights0[sx + 1 + sy * wx] * w01; + if (w10) total0 += pSrc_weights0[sx + (sy + 1) * wx] * w10; + if (w11) total0 += pSrc_weights0[sx + 1 + (sy + 1) * wx] * w11; + + pDst_weights0[texel_x + texel_y * bx] = (uint8_t)(total0 >> 4); + } + + if (pDst_weights1) + { + uint32_t total1 = 8; + + if (w00) total1 += pSrc_weights1[sx + sy * wx] * w00; + if (w01) total1 += pSrc_weights1[sx + 1 + sy * wx] * w01; + if (w10) total1 += pSrc_weights1[sx + (sy + 1) * wx] * w10; + if (w11) total1 += pSrc_weights1[sx + 1 + (sy + 1) * wx] * w11; + + pDst_weights1[texel_x + texel_y * bx] = (uint8_t)(total1 >> 4); + } + } // texel_x + } // texel_y + } + + inline uint32_t hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + bool is_small_block(uint32_t block_width, uint32_t block_height) + { + assert((block_width >= MIN_BLOCK_DIM) && (block_width <= MAX_BLOCK_DIM)); + assert((block_height >= MIN_BLOCK_DIM) && (block_height <= MAX_BLOCK_DIM)); + + const uint32_t num_blk_pixels = block_width * block_height; + + return num_blk_pixels < 31; + } + + // small_block = num_blk_pixels < 31 + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block) + { + assert(zIn == 0); + + const uint32_t x = small_block ? xIn << 1 : xIn; + const uint32_t y = small_block ? yIn << 1 : yIn; + const uint32_t z = small_block ? zIn << 1 : zIn; + const uint32_t seed = seedIn + 1024 * (num_partitions - 1); + const uint32_t rnum = hash52(seed); + + uint8_t seed1 = (uint8_t)(rnum & 0xf); + uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf); + uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf); + uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf); + uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf); + uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf); + uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf); + uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf); + uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf); + uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf); + uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf); + uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf); + + seed1 = (uint8_t)(seed1 * seed1); + seed2 = (uint8_t)(seed2 * seed2); + seed3 = (uint8_t)(seed3 * seed3); + seed4 = (uint8_t)(seed4 * seed4); + seed5 = (uint8_t)(seed5 * seed5); + seed6 = (uint8_t)(seed6 * seed6); + seed7 = (uint8_t)(seed7 * seed7); + seed8 = (uint8_t)(seed8 * seed8); + seed9 = (uint8_t)(seed9 * seed9); + seed10 = (uint8_t)(seed10 * seed10); + seed11 = (uint8_t)(seed11 * seed11); + seed12 = (uint8_t)(seed12 * seed12); + + const int shA = (seed & 2) != 0 ? 4 : 5; + const int shB = (num_partitions == 3) ? 6 : 5; + const int sh1 = (seed & 1) != 0 ? shA : shB; + const int sh2 = (seed & 1) != 0 ? shB : shA; + const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; + + seed1 = (uint8_t)(seed1 >> sh1); + seed2 = (uint8_t)(seed2 >> sh2); + seed3 = (uint8_t)(seed3 >> sh1); + seed4 = (uint8_t)(seed4 >> sh2); + seed5 = (uint8_t)(seed5 >> sh1); + seed6 = (uint8_t)(seed6 >> sh2); + seed7 = (uint8_t)(seed7 >> sh1); + seed8 = (uint8_t)(seed8 >> sh2); + seed9 = (uint8_t)(seed9 >> sh3); + seed10 = (uint8_t)(seed10 >> sh3); + seed11 = (uint8_t)(seed11 >> sh3); + seed12 = (uint8_t)(seed12 >> sh3); + + const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14)); + const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10)); + const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0; + const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0; + + return (a >= b && a >= c && a >= d) ? 0 + : (b >= c && b >= d) ? 1 + : (c >= d) ? 2 + : 3; + } + + // Precomputed partition patterns for each 10-bit seed and small/large block sizes for 2-3 subsets. + // This costs 144KB of RAM and some init, but considering the sheer complexity of compute_texel_partition() and how hotly it's called in the compressors and transcoders that's worth it. + // Byte packing: + // low 4 bits=small blocks (on valid up to 6x5) + // high 4 bits=large blocks (6x6 or larger) + + static uint8_t g_texel_partitions[NUM_PARTITION_PATTERNS][12][12]; // [seed][y][x] + + void sanity_check_texel_partition_tables() + { +#if 0 +#if defined(_DEBUG) || defined(DEBUG) + // sanity checking + for (uint32_t i = 0; i < cTOTAL_BLOCK_SIZES; i++) + { + const uint32_t bw = g_astc_block_sizes[i][0], bh = g_astc_block_sizes[i][1]; + const bool is_small_block_flag = is_small_block(bw, bh); + + assert(get_block_size_index(bw, bh) == i); + + for (uint32_t s = 0; s < NUM_PARTITION_PATTERNS; s++) + { + for (uint32_t y = 0; y < bh; y++) + { + for (uint32_t x = 0; x < bw; x++) + { + const uint32_t k2 = compute_texel_partition(s, x, y, 0, 2, is_small_block_flag); + const uint32_t k3 = compute_texel_partition(s, x, y, 0, 3, is_small_block_flag); + + assert(get_precomputed_texel_partition(bw, bh, s, x, y, 2) == (int)k2); + assert(get_precomputed_texel_partition(bw, bh, s, x, y, 3) == (int)k3); + } // x + } // y + } // s + } + printf("precompute_texel_partitions: Sanity check OK\n"); +#endif +#endif + } + + void precompute_texel_partition() + { + for (uint32_t seed = 0; seed < NUM_PARTITION_PATTERNS; seed++) + { + for (uint32_t y = 0; y < MAX_BLOCK_DIM; y++) + { + for (uint32_t x = 0; x < MAX_BLOCK_DIM; x++) + { + uint32_t k = 0; + + // small block (width*height<31) + if ((x <= 6) && (y <= 5)) + { + uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, true); assert(v2 <= 1); + uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, true); assert(v3 <= 2); + k |= v2 | (v3 << 2); + } + + // not small block + { + uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, false); assert(v2 <= 1); + uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, false); assert(v3 <= 2); + k |= ((v2 | (v3 << 2)) << 4); + } + + assert(k <= 255); + + g_texel_partitions[seed][y][x] = (uint8_t)k; + } // x + } // y + } // seed + } + + int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t subsets) + { + assert(seed < NUM_PARTITION_PATTERNS); + assert((subsets >= 2) && (subsets <= 3)); + assert((x < block_width) && (y < block_height)); + + const uint32_t v = g_texel_partitions[seed][y][x]; + + uint32_t shift = (subsets == 3) ? 2 : 0; + shift += ((block_width * block_height) >= 31) * 4; + uint32_t res = (v >> shift) & 3; + + // sanity checking + assert(res == (uint32_t)compute_texel_partition(seed, x, y, 0, subsets, is_small_block(block_width, block_height))); + + return res; + } + + void precompute_texel_partitions() + { + if (!g_texel_partitions[0][0][0]) + precompute_texel_partition(); + + sanity_check_texel_partition_tables(); + } + + void blue_contract( + int r, int g, int b, int a, + int &dr, int &dg, int &db, int &da) + { + dr = (r + b) >> 1; + dg = (g + b) >> 1; + db = b; + da = a; + } + + inline void bit_transfer_signed(int& a, int& b) + { + b >>= 1; + b |= (a & 0x80); + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) + a -= 0x40; + } + + static inline int clamp(int a, int l, int h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + static inline float clampf(float a, float l, float h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + inline int sign_extend(int src, int num_src_bits) + { + assert((num_src_bits >= 2) && (num_src_bits <= 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); + } + + // endpoints is [4][2] + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE) + { + assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA); + + int v0 = pE[0], v1 = pE[1]; + + int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0]; + int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1]; + + switch (cem_index) + { + case CEM_LDR_LUM_DIRECT: + { + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_BASE_PLUS_OFS: + { + int l0 = (v0 >> 2) | (v1 & 0xc0); + int l1 = l0 + (v1 & 0x3f); + + if (l1 > 0xFF) + l1 = 0xFF; + + e0_r = l0; e1_r = l1; + e0_g = l0; e1_g = l1; + e0_b = l0; e1_b = l1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_ALPHA_DIRECT: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = v2; e1_a = v3; + break; + } + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + { + int v2 = pE[2], v3 = pE[3]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + + e0_r = v0; e1_r = v0 + v1; + e0_g = v0; e1_g = v0 + v1; + e0_b = v0; e1_b = v0 + v1; + e0_a = v2; e1_a = v2 + v3; + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = 0xFF; e1_a = 0xFF; + + break; + } + case CEM_LDR_RGB_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = v4; e1_a = v5; + + break; + } + case CEM_LDR_RGBA_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = v6; e1_a = v7; + } + else + { + blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + bit_transfer_signed(v7, v6); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = v6; e1_a = v6 + v7; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_HDR_LUM_LARGE_RANGE: + { + int y0, y1; + if (v1 >= v0) + { + y0 = (v0 << 4); + y1 = (v1 << 4); + } + else + { + y0 = (v1 << 4) + 8; + y1 = (v0 << 4) - 8; + } + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_LUM_SMALL_RANGE: + { + int y0, y1, d; + + if ((v0 & 0x80) != 0) + { + y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2); + d = (v1 & 0x1F) << 2; + } + else + { + y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1); + d = (v1 & 0x0F) << 1; + } + + y1 = y0 + d; + if (y1 > 0xFFF) + y1 = 0xFFF; + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + int majcomp, mode; + if ((modeval & 0xC) != 0xC) + { + majcomp = modeval >> 2; + mode = modeval & 3; + } + else if (modeval != 0xF) + { + majcomp = modeval & 3; + mode = 4; + } + else + { + majcomp = 0; + mode = 5; + } + + int red = v0 & 0x3f; + int green = v1 & 0x1f; + int blue = v2 & 0x1f; + int scale = v3 & 0x1f; + + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; + int x2 = (v2 >> 6) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; + int x5 = (v3 >> 6) & 1; + int x6 = (v3 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0x30) green |= x0 << 6; + if (ohm & 0x3A) green |= x1 << 5; + if (ohm & 0x30) blue |= x2 << 6; + if (ohm & 0x3A) blue |= x3 << 5; + if (ohm & 0x3D) scale |= x6 << 5; + if (ohm & 0x2D) scale |= x5 << 6; + if (ohm & 0x04) scale |= x4 << 7; + if (ohm & 0x3B) red |= x4 << 6; + if (ohm & 0x04) red |= x3 << 6; + if (ohm & 0x10) red |= x5 << 7; + if (ohm & 0x0F) red |= x2 << 7; + if (ohm & 0x05) red |= x1 << 8; + if (ohm & 0x0A) red |= x0 << 8; + if (ohm & 0x05) red |= x0 << 9; + if (ohm & 0x02) red |= x6 << 9; + if (ohm & 0x01) red |= x3 << 10; + if (ohm & 0x02) red |= x5 << 10; + + static const int s_shamts[6] = { 1,1,2,3,4,5 }; + + const int shamt = s_shamts[mode]; + red <<= shamt; + green <<= shamt; + blue <<= shamt; + scale <<= shamt; + + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + if (majcomp == 1) + std::swap(red, green); + + if (majcomp == 2) + std::swap(red, blue); + + e1_r = clamp(red, 0, 0xFFF); + e1_g = clamp(green, 0, 0xFFF); + e1_b = clamp(blue, 0, 0xFFF); + e1_a = 0x780; + + e0_r = clamp(red - scale, 0, 0xFFF); + e0_g = clamp(green - scale, 0, 0xFFF); + e0_b = clamp(blue - scale, 0, 0xFFF); + e0_a = 0x780; + + break; + } + case CEM_HDR_RGB_HDR_ALPHA: + case CEM_HDR_RGB_LDR_ALPHA: + case CEM_HDR_RGB: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6); + + e0_a = 0x780; + e1_a = 0x780; + + if (majcomp == 3) + { + e0_r = v0 << 4; + e0_g = v2 << 4; + e0_b = (v4 & 0x7f) << 5; + + e1_r = v1 << 4; + e1_g = v3 << 4; + e1_b = (v5 & 0x7f) << 5; + } + else + { + int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5); + int va = v0 | ((v1 & 0x40) << 2); + int vb0 = v2 & 0x3f; + int vb1 = v3 & 0x3f; + int vc = v1 & 0x3f; + int vd0 = v4 & 0x7f; + int vd1 = v5 & 0x7f; + + static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 }; + vd0 = sign_extend(vd0, s_dbitstab[mode]); + vd1 = sign_extend(vd1, s_dbitstab[mode]); + + int x0 = (v2 >> 6) & 1; + int x1 = (v3 >> 6) & 1; + int x2 = (v4 >> 6) & 1; + int x3 = (v5 >> 6) & 1; + int x4 = (v4 >> 5) & 1; + int x5 = (v5 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0xA4) va |= x0 << 9; + if (ohm & 0x08) va |= x2 << 9; + if (ohm & 0x50) va |= x4 << 9; + if (ohm & 0x50) va |= x5 << 10; + if (ohm & 0xA0) va |= x1 << 10; + if (ohm & 0xC0) va |= x2 << 11; + if (ohm & 0x04) vc |= x1 << 6; + if (ohm & 0xE8) vc |= x3 << 6; + if (ohm & 0x20) vc |= x2 << 7; + if (ohm & 0x5B) vb0 |= x0 << 6; + if (ohm & 0x5B) vb1 |= x1 << 6; + if (ohm & 0x12) vb0 |= x2 << 7; + if (ohm & 0x12) vb1 |= x3 << 7; + + int shamt = (mode >> 1) ^ 3; + va = (uint32_t)va << shamt; + vb0 = (uint32_t)vb0 << shamt; + vb1 = (uint32_t)vb1 << shamt; + vc = (uint32_t)vc << shamt; + vd0 = (uint32_t)vd0 << shamt; + vd1 = (uint32_t)vd1 << shamt; + + e1_r = clamp(va, 0, 0xFFF); + e1_g = clamp(va - vb0, 0, 0xFFF); + e1_b = clamp(va - vb1, 0, 0xFFF); + + e0_r = clamp(va - vc, 0, 0xFFF); + e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF); + e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF); + + if (majcomp == 1) + { + std::swap(e0_r, e0_g); + std::swap(e1_r, e1_g); + } + else if (majcomp == 2) + { + std::swap(e0_r, e0_b); + std::swap(e1_r, e1_b); + } + } + + if (cem_index == CEM_HDR_RGB_LDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + e0_a = v6; + e1_a = v7; + } + else if (cem_index == CEM_HDR_RGB_HDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + // Extract mode bits + int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); + v6 &= 0x7F; + v7 &= 0x7F; + + if (mode == 3) + { + e0_a = v6 << 5; + e1_a = v7 << 5; + } + else + { + v6 |= (v7 << (mode + 1)) & 0x780; + v7 &= (0x3F >> mode); + v7 ^= (0x20 >> mode); + v7 -= (0x20 >> mode); + + //v6 <<= (4 - mode); // undefined behavior if neg + v6 = ((uint32_t)v6) << (4 - mode); + + //v7 <<= (4 - mode); // undefined behavior if neg + v7 = ((uint32_t)v7) << (4 - mode); + + v7 += v6; + v7 = clamp(v7, 0, 0xFFF); + e0_a = v6; + e1_a = v7; + } + } + + break; + } + default: + { + assert(0); + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = 0; + pEndpoints[c][1] = 0; + } + break; + } + } + } + + static inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + half_float float_to_half(float val, bool toward_zero) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + { + if (toward_zero) + m = (int)truncf((1 << 24) * fabsf(fi.f)); + else + m = (int)lrintf((1 << 24) * fabsf(fi.f)); + } + else + { + e = new_exp + 15; + if (toward_zero) + m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); + else + m = (int)lrintf((float)flt_m * (1.0f / (float)(1 << 13))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + half_float result = (half_float)((s << 15) | (e << 10) | m); + return result; + } + + float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31; + const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS); + const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS); + const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1); + //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP)); + const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS)); + + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b) + { + int x = packed & 511; + int y = (packed >> 9) & 511; + int z = (packed >> 18) & 511; + int w = (packed >> 27) & 31; + + const float scale = powf(2.0f, static_cast(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + r = x * scale; + g = y * scale; + b = z * scale; + } + + // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases. + static inline int floor_log2(float x) + { + union float754 + { + unsigned int raw; + float value; + }; + + float754 f; + f.value = x; + // Extract float exponent + return ((f.raw >> 23) & 0xFF) - 127; + } + + static inline int maximumi(int a, int b) { return (a > b) ? a : b; } + static inline float maximumf(float a, float b) { return (a > b) ? a : b; } + + uint32_t pack_rgb9e5(float r, float g, float b) + { + r = clampf(r, 0.0f, MAX_RGB9E5); + g = clampf(g, 0.0f, MAX_RGB9E5); + b = clampf(b, 0.0f, MAX_RGB9E5); + + float maxrgb = maximumf(maximumf(r, g), b); + int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP)); + + float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + int maxm = (int)floorf((maxrgb / denom) + 0.5f); + if (maxm == (MAX_RGB9E5_MANTISSA + 1)) + { + denom *= 2; + exp_shared += 1; + assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); + } + else + { + assert(maxm <= MAX_RGB9E5_MANTISSA); + } + + int rm = (int)floorf((r / denom) + 0.5f); + int gm = (int)floorf((g / denom) + 0.5f); + int bm = (int)floorf((b / denom) + 0.5f); + + assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA)); + assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA)); + assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA)); + + return rm | (gm << 9) | (bm << 18) | (exp_shared << 27); + } + + static inline int clz17(uint32_t x) + { + assert(x <= 0x1FFFF); + x &= 0x1FFFF; + + if (!x) + return 17; + + uint32_t n = 0; + while ((x & 0x10000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb) + { + int lz = clz17(Cr | Cg | Cb | 1); + if (Cr == 65535) { Cr = 65536; lz = 0; } + if (Cg == 65535) { Cg = 65536; lz = 0; } + if (Cb == 65535) { Cb = 65536; lz = 0; } + Cr <<= lz; Cg <<= lz; Cb <<= lz; + Cr = (Cr >> 8) & 0x1FF; + Cg = (Cg >> 8) & 0x1FF; + Cb = (Cb >> 8) & 0x1FF; + uint32_t exponent = 16 - lz; + uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr; + return texel; + } + + static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb) + { + if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff; + if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff; + if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff; + int Re = (Cr >> 10) & 0x1F; + int Ge = (Cg >> 10) & 0x1F; + int Be = (Cb >> 10) & 0x1F; + int Rex = (Re == 0) ? 1 : Re; + int Gex = (Ge == 0) ? 1 : Ge; + int Bex = (Be == 0) ? 1 : Be; + int Xm = ((Cr | Cg | Cb) & 0x200) >> 9; + int Xe = Re | Ge | Be; + uint32_t rshift, gshift, bshift, expo; + + if (Xe == 0) + { + expo = rshift = gshift = bshift = Xm; + } + else if (Re >= Ge && Re >= Be) + { + expo = Rex + 1; + rshift = 2; + gshift = Rex - Gex + 2; + bshift = Rex - Bex + 2; + } + else if (Ge >= Be) + { + expo = Gex + 1; + rshift = Gex - Rex + 2; + gshift = 2; + bshift = Gex - Bex + 2; + } + else + { + expo = Bex + 1; + rshift = Bex - Rex + 2; + gshift = Bex - Gex + 2; + bshift = 2; + } + + int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400); + int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400); + int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400); + Rm = (Rm >> rshift) & 0x1FF; + Gm = (Gm >> gshift) & 0x1FF; + Bm = (Bm >> bshift) & 0x1FF; + + uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0); + return texel; + } + + static void write_error_block(void* pPixels, uint32_t num_blk_pixels, decode_mode dec_mode) + { + // Write block error color + if (dec_mode == cDecodeModeHDR16) + { + // NaN's + memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = purple_9e5; + } + else + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = 0xFFFF00FF; + } + } + + // Important: pPixels is either 32-bit/texel or 64-bit/texel. + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) + { + assert(is_valid_block_size(blk_width, blk_height)); + + // Basic sanity checking + if (!log_blk.m_dual_plane) + { + assert(log_blk.m_color_component_selector == 0); + } + else + { + assert(log_blk.m_color_component_selector <= 3); + } + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) + return false; + + const uint32_t num_blk_pixels = blk_width * blk_height; + + if (log_blk.m_error_flag) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + // Should this return false? It's not an invalid logical block config, though. + return false; + } + + // Handle solid color blocks + if (log_blk.m_solid_color_flag_ldr) + { + // LDR solid block + if (dec_mode == cDecodeModeHDR16) + { + // Convert LDR pixels to half-float + half_float h[4]; + for (uint32_t c = 0; c < 4; c++) + h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f)); + float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f)); + float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f)); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + // Convert LDR pixels to 8-bits + for (uint32_t i = 0; i < num_blk_pixels; i++) + for (uint32_t c = 0; c < 4; c++) + ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8); + } + + return true; + } + else if (log_blk.m_solid_color_flag_hdr) + { + // HDR solid block, decode mode must be half-float or RGB9E5 + if (dec_mode == cDecodeModeHDR16) + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = half_to_float(log_blk.m_solid_color[0]); + float g = half_to_float(log_blk.m_solid_color[1]); + float b = half_to_float(log_blk.m_solid_color[2]); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + return true; + } + + // Sanity check block's config + if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + if (log_blk.m_color_component_selector > 3) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range); + const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range); + + bool is_ldr_endpoints[MAX_PARTITIONS]; + + // Check CEM's + uint32_t total_cem_vals = 0; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + if (log_blk.m_color_endpoint_modes[i] > 15) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]); + + is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]); + } + + if (total_cem_vals > MAX_ENDPOINTS) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); + const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); + + // Dequantized endpoints to [0,255] + uint8_t dequantized_endpoints[MAX_ENDPOINTS]; + for (uint32_t i = 0; i < total_cem_vals; i++) + { + if (log_blk.m_endpoints[i] >= total_endpoint_levels) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; + } + + // Dequantize weights to [0,64] + uint8_t dequantized_weights[2][12 * 12]; + + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + for (uint32_t i = 0; i < total_weight_vals; i++) + { + if (log_blk.m_weights[i] >= total_weight_levels) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); + return false; + } + + const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0; + const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i; + + dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]]; + } + + // Upsample weight grid. [0,64] weights + uint8_t upsampled_weights[2][12 * 12]; + + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]); + if (log_blk.m_dual_plane) + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]); + + // Decode CEM's + int endpoints[4][4][2]; // [subset][comp][l/h] + + uint32_t endpoint_val_index = 0; + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset]; + + decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]); + + endpoint_val_index += get_num_cem_values(cem_index); + } + + // Decode texels + const bool small_block = num_blk_pixels < 31; + const bool use_precomputed_texel_partitions = (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); + const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; + + bool success = true; + + if (dec_mode == cDecodeModeRGB9E5) + { + // returns uint32_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + int comp[3]; + + for (uint32_t c = 0; c < 3; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + if (is_ldr_endpoints[subset]) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + comp[c] = k; // 1.0 + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + comp[c] = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan((half_float)comp[c])) + comp[c] = 0x7BFF; + } + + } // c + + uint32_t packed; + if (is_ldr_endpoints[subset]) + packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]); + else + packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]); + + ((uint32_t*)pPixels)[pixel_index] = packed; + + } // x + } // y + } + else if (dec_mode == cDecodeModeHDR16) + { + // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application) + + // returns half floats + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + half_float o; + + if ( (is_ldr_endpoints[subset]) || + ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) ) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + if (k == 0xFFFF) + o = 0x3C00; // 1.0 + else + o = float_to_half((float)k * (1.0f / 65536.0f), true); + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + o = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan(o)) + o = 0x7BFF; + } + + ((half_float*)pPixels)[pixel_index * 4 + c] = o; + } + + } // x + } // y + } + else + { + // returns uint8_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + if (!is_ldr_endpoints[subset]) + { + ((uint32_t*)pPixels)[pixel_index] = 0xFFFF00FF; + success = false; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder + //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2)) + // See https://github.com/ARM-software/astc-encoder/issues/447 + // See latest spec with recent (2023-2024) fixes: + // https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt + // "For _LDR endpoint modes_, each color component C is calculated from the corresponding 8 - bit endpoint components C~0~and C~1~as follows" - does this mean alpha too? I guess so. (8/15/2025.) + if (dec_mode == cDecodeModeSRGB8) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + uint32_t k = weight_interpolate(le, he, w); + + // FIXME (old comment - before 2023/2024 ARM etc. spec fixes): This is what the spec says to do in LDR mode, but this is not what ARM's decoder does + // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. + // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit. + ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8); + } + } + + } // x + } // y + } + + return success; + } + + bool is_block_xuastc_ldr(const log_astc_block& log_blk) + { + if (log_blk.m_error_flag) + return false; + + if (log_blk.m_solid_color_flag_ldr) + return true; + + if (log_blk.m_solid_color_flag_hdr) + return false; + + if (log_blk.m_num_partitions > 3) + return false; + + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > 1)) + return false; + + // TODO: Check partition pattern ID against unique set. + + for (uint32_t i = 1; i < log_blk.m_num_partitions; i++) + if (log_blk.m_color_endpoint_modes[0] != log_blk.m_color_endpoint_modes[i]) + return false; + + switch (log_blk.m_color_endpoint_modes[0]) + { + case CEM_LDR_LUM_DIRECT: + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_RGB_BASE_SCALE: + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + break; + } + default: + { + return false; + } + } + + return true; + } + + // ~2x faster than decode_block(), but XUASTC LDR only. + // pUpsampled_weights_to_use must be at block res, [0,64], single plane blocks ONLY + bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode, + const uint8_t* pUpsampled_weights_to_use, uint32_t start_x, uint32_t start_y, uint32_t end_x, uint32_t end_y) + { + if (!end_x) + end_x = blk_width; + + if (!end_y) + end_y = blk_height; + + assert(start_x < end_x); + assert(start_y < end_y); + assert(end_x <= blk_width); + assert(end_y <= blk_height); + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + assert((dec_mode == cDecodeModeSRGB8) || (dec_mode == cDecodeModeLDR8)); + assert(is_valid_block_size(blk_width, blk_height)); + assert(!log_blk.m_error_flag && !log_blk.m_solid_color_flag_hdr); + + if (!log_blk.m_solid_color_flag_ldr) + { + assert(((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= 3))); + assert((log_blk.m_grid_width >= 2) & (log_blk.m_grid_height >= 2)); + assert((log_blk.m_grid_width <= blk_width) && (log_blk.m_grid_height <= blk_height)); + assert((log_blk.m_grid_width * log_blk.m_grid_height) <= MAX_GRID_WEIGHTS); + assert((log_blk.m_num_partitions > 1) || (log_blk.m_partition_id == 0)); + } + + assert(is_block_xuastc_ldr(log_blk)); + + const uint32_t num_blk_pixels = blk_width * blk_height; + + // Handle solid color blocks + if (log_blk.m_solid_color_flag_ldr) + { + // Convert LDR pixels to 8-bits + uint32_t x; + + ((uint8_t*)&x)[0] = (uint8_t)(log_blk.m_solid_color[0] >> 8); + ((uint8_t*)&x)[1] = (uint8_t)(log_blk.m_solid_color[1] >> 8); + ((uint8_t*)&x)[2] = (uint8_t)(log_blk.m_solid_color[2] >> 8); + ((uint8_t*)&x)[3] = (uint8_t)(log_blk.m_solid_color[3] >> 8); + + uint32_t* pDst = (uint32_t*)pPixels; + + uint32_t i = 0; + while ((i + 3) < num_blk_pixels) + { + pDst[i] = x; + pDst[i + 1] = x; + pDst[i + 2] = x; + pDst[i + 3] = x; + + i += 4; + } + + while (i < num_blk_pixels) + pDst[i++] = x; + + return true; + } + + const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); + const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); + + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + // Check CEM's + const uint32_t num_cem_vals = get_num_cem_values(log_blk.m_color_endpoint_modes[0]); + const uint32_t total_cem_vals = num_cem_vals * log_blk.m_num_partitions; + + assert(total_cem_vals <= MAX_ENDPOINTS); + + // Dequantized endpoints to [0,255] + uint8_t dequantized_endpoints[MAX_ENDPOINTS]; + + for (uint32_t i = 0; i < total_cem_vals; i++) + { + assert(log_blk.m_endpoints[i] < endpoint_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; + } + + // Decode CEM's + int endpoints[4][4][2]; // [subset][comp][l/h] + + uint32_t endpoint_val_index = 0; + const uint32_t cem_index = log_blk.m_color_endpoint_modes[0]; + + uint32_t alpha_mask = 0xFF; + + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + assert(log_blk.m_color_endpoint_modes[subset] == cem_index); + + decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]); + + alpha_mask &= endpoints[subset][3][0]; + alpha_mask &= endpoints[subset][3][1]; + + endpoint_val_index += num_cem_vals; + } + + const bool any_alpha = alpha_mask != 255; + + // Dequantize weights to [0,64] + uint8_t upsampled_weights[2][12 * 12]; + + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + + // Upsample weight grid. [0,64] weights + const uint8_t(*pUpsampled_weights)[12 * 12]; + + uint8_t dequantized_weights[2][12 * 12]; + + // For simplicity, ignore any passed in weights if dual plane + if ((pUpsampled_weights_to_use) && (!log_blk.m_dual_plane)) + { + // Caller is jamming in already unpacked weights for the first plane to save time + pUpsampled_weights = reinterpret_cast(pUpsampled_weights_to_use); + } + else + { + if (log_blk.m_dual_plane) + { + for (uint32_t i = 0; i < total_weight_vals; i++) + { + const uint32_t plane_index = i & 1; + const uint32_t grid_index = i >> 1; + + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]]; + } + } + else + { + for (uint32_t i = 0; i < total_weight_vals; i++) + { + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_weights[0][i] = pWeight_dequant[log_blk.m_weights[i]]; + } + } + + pUpsampled_weights = &dequantized_weights[0]; + + if ((log_blk.m_grid_width < blk_width) || (log_blk.m_grid_height < blk_height)) + { + upsample_weight_grid_xuastc_ldr(blk_width, blk_height, + log_blk.m_grid_width, log_blk.m_grid_height, + &dequantized_weights[0][0], &upsampled_weights[0][0], + log_blk.m_dual_plane ? &dequantized_weights[1][0] : nullptr, log_blk.m_dual_plane ? &upsampled_weights[1][0] : nullptr); + + pUpsampled_weights = &upsampled_weights[0]; + } + } + + // Decode texels + const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; + + const uint8_t *pPart = &g_texel_partitions[log_blk.m_partition_id][0][0]; // [seed][y][x] + + const bool large_block = (num_blk_pixels >= 31); + uint32_t part_shift = (log_blk.m_num_partitions == 3) ? 2 : 0; + part_shift += large_block * 4; + + //uint32_t pixel_index = 0; + + if (log_blk.m_num_partitions == 1) + { + // alpha, 1 subset + int le0 = endpoints[0][0][0], he0 = endpoints[0][0][1]; + int le1 = endpoints[0][1][0], he1 = endpoints[0][1][1]; + int le2 = endpoints[0][2][0], he2 = endpoints[0][2][1]; + int le3 = endpoints[0][3][0], he3 = endpoints[0][3][1]; + + if (dec_mode == cDecodeModeSRGB8) + { + le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80; + le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80; + le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80; + le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80; + } + else + { + le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0; + le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1; + le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2; + le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3; + } + + // no subsets + if (!any_alpha) + { + if (!log_blk.m_dual_plane) + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + else + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + } + else // (!any_alpha) + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + const uint32_t k3 = weight_interpolate(le3, he3, w3); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8); + + } // x + } // y + } + } + else + { + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1]; + + if (dec_mode == cDecodeModeSRGB8) + { + le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80; + le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80; + le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80; + le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80; + } + else + { + le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0; + le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1; + le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2; + le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3; + } + + endpoints[subset][0][0] = le0, endpoints[subset][0][1] = he0; + endpoints[subset][1][0] = le1, endpoints[subset][1][1] = he1; + endpoints[subset][2][0] = le2, endpoints[subset][2][1] = he2; + endpoints[subset][3][0] = le3, endpoints[subset][3][1] = he3; + } + + // subsets + if (!any_alpha) + { + // no alpha, sRGB + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t v = pPart[y * 12 + x]; + const uint32_t subset = (v >> part_shift) & 3; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + else + { + // alpha + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t v = pPart[y * 12 + x]; + const uint32_t subset = (v >> part_shift) & 3; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index]; + + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + const uint32_t k3 = weight_interpolate(le3, he3, w3); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8); + + } // x + } // y + + } + + } // if (log_blk.m_num_partitions == 1) + + return true; + } + + //------------------------------------------------ + // Physical to logical block decoding + + // unsigned 128-bit int, with some signed helpers + class uint128 + { + uint64_t m_lo, m_hi; + + public: + uint128() = default; + inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { } + inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { } + inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { } + + inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; } + inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; } + + inline explicit operator uint8_t () const { return (uint8_t)m_lo; } + inline explicit operator uint16_t () const { return (uint16_t)m_lo; } + inline explicit operator uint32_t () const { return (uint32_t)m_lo; } + inline explicit operator uint64_t () const { return m_lo; } + + inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; } + inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; } + + inline uint64_t get_low() const { return m_lo; } + inline uint64_t& get_low() { return m_lo; } + + inline uint64_t get_high() const { return m_hi; } + inline uint64_t& get_high() { return m_hi; } + + inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); } + inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); } + + inline bool operator< (const uint128& rhs) const + { + if (m_hi < rhs.m_hi) + return true; + + if (m_hi == rhs.m_hi) + { + if (m_lo < rhs.m_lo) + return true; + } + + return false; + } + + inline bool operator> (const uint128& rhs) const { return (rhs < *this); } + + inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); } + inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); } + + inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); } + inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); } + inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); } + inline explicit operator bool() const { return is_non_zero(); } + inline bool is_signed() const { return ((int64_t)m_hi) < 0; } + + inline bool signed_less(const uint128& rhs) const + { + const bool l_signed = is_signed(), r_signed = rhs.is_signed(); + + if (l_signed == r_signed) + return *this < rhs; + + if (l_signed && !r_signed) + return true; + + assert(!l_signed && r_signed); + return false; + } + + inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); } + inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); } + inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); } + + double get_double() const + { + double res = 0; + + if (m_hi) + res = (double)m_hi * pow(2.0f, 64.0f); + + res += (double)m_lo; + + return res; + } + + double get_signed_double() const + { + if (is_signed()) + return -(uint128(*this).abs().get_double()); + else + return get_double(); + } + + inline uint128 abs() const + { + uint128 res(*this); + if (res.is_signed()) + res = -res; + return res; + } + + inline uint128& operator<<= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift); + + if ((shift) && (shift < 64)) + m_hi |= (m_lo >> (64 - shift)); + + m_lo = (shift >= 64) ? 0 : (m_lo << shift); + + return *this; + } + + inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; } + + inline uint128& operator>>= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift); + + if ((shift) && (shift < 64)) + m_lo |= (m_hi << (64 - shift)); + + m_hi = (shift >= 64) ? 0 : (m_hi >> shift); + + return *this; + } + + inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; } + + inline uint128 signed_shift_right(int shift) const + { + uint128 res(*this); + res >>= shift; + + if (is_signed()) + { + uint128 x(0U); + x = ~x; + x >>= shift; + res |= (~x); + } + + return res; + } + + inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; } + inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; } + + inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; } + inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; } + + inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; } + inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; } + + inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); } + + inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; } + + // prefix + inline uint128 operator ++() + { + if (++m_lo == 0) + ++m_hi; + return *this; + } + + // postfix + inline uint128 operator ++(int) + { + uint128 res(*this); + if (++m_lo == 0) + ++m_hi; + return res; + } + + // prefix + inline uint128 operator --() + { + const uint64_t t = m_lo; + if (--m_lo > t) + --m_hi; + return *this; + } + + // postfix + inline uint128 operator --(int) + { + const uint64_t t = m_lo; + uint128 res(*this); + if (--m_lo > t) + --m_hi; + return res; + } + + inline uint128& operator+= (const uint128& rhs) + { + const uint64_t t = m_lo + rhs.m_lo; + m_hi = m_hi + rhs.m_hi + (t < m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; } + + inline uint128& operator-= (const uint128& rhs) + { + const uint64_t t = m_lo - rhs.m_lo; + m_hi = m_hi - rhs.m_hi - (t > m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; } + + // computes bit by bit, very slow + uint128& operator*=(const uint128& rhs) + { + uint128 temp(*this), result(0U); + + for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1) + if (bitmask.get_low() & 1) + result += temp; + + *this = result; + return *this; + } + + uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; } + + // computes bit by bit, very slow + friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder) + { + remainder = 0; + + if (!divisor) + { + assert(0); + return ~uint128(0U); + } + + uint128 quotient(0), one(1); + + for (int i = 127; i >= 0; i--) + { + remainder = (remainder << 1) | ((dividend >> i) & one); + if (remainder >= divisor) + { + remainder -= divisor; + quotient |= (one << i); + } + } + + return quotient; + } + + uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; } + uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; } + + uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; } + uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; } + + void print_hex(FILE* pFile) const + { + fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo); + } + + void format_unsigned(std::string& res) const + { + basisu::vector digits; + digits.reserve(39 + 1); + + uint128 k(*this), ten(10); + do + { + uint128 r; + k = divide(k, ten, r); + digits.push_back((uint8_t)r); + } while (k); + + for (int i = (int)digits.size() - 1; i >= 0; i--) + res += ('0' + digits[i]); + } + + void format_signed(std::string& res) const + { + uint128 val(*this); + + if (val.is_signed()) + { + res.push_back('-'); + val = -val; + } + + val.format_unsigned(res); + } + + void print_unsigned(FILE* pFile) + { + std::string str; + format_unsigned(str); + fprintf(pFile, "%s", str.c_str()); + } + + void print_signed(FILE* pFile) + { + std::string str; + format_signed(str); + fprintf(pFile, "%s", str.c_str()); + } + + uint128 get_reversed_bits() const + { + uint128 res; + + const uint32_t* pSrc = (const uint32_t*)this; + uint32_t* pDst = (uint32_t*)&res; + + pDst[0] = rev_dword(pSrc[3]); + pDst[1] = rev_dword(pSrc[2]); + pDst[2] = rev_dword(pSrc[1]); + pDst[3] = rev_dword(pSrc[0]); + + return res; + } + + uint128 get_byteswapped() const + { + uint128 res; + + const uint8_t* pSrc = (const uint8_t*)this; + uint8_t* pDst = (uint8_t*)&res; + + for (uint32_t i = 0; i < 16; i++) + pDst[i] = pSrc[15 - i]; + + return res; + } + + inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_ofs < 128); + assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128)); + + uint128 res(*this); + res >>= bit_ofs; + + const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1); + return res.get_low() & bitmask; + } + + inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_len <= 32); + return (uint32_t)get_bits64(bit_ofs, bit_len); + } + + inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const + { + assert(len && (len <= 32)); + uint32_t x = get_bits(bit_ofs, len); + bit_ofs += len; + return x; + } + + inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits) + { + assert(bit_ofs < 128); + assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128)); + + uint128 bitmask(1); + bitmask = (bitmask << num_bits) - 1; + assert(uint128(val) <= bitmask); + + bitmask <<= bit_ofs; + *this &= ~bitmask; + + *this = *this | (uint128(val) << bit_ofs); + return *this; + } + }; + + static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk) + { + if (bits.get_bits(10, 2) != 0b11) + return false; + + uint32_t bit_ofs = 12; + const uint32_t min_s = bits.next_bits(bit_ofs, 13); + const uint32_t max_s = bits.next_bits(bit_ofs, 13); + const uint32_t min_t = bits.next_bits(bit_ofs, 13); + const uint32_t max_t = bits.next_bits(bit_ofs, 13); + assert(bit_ofs == 64); + + const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF); + + if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t))) + return false; + + const bool hdr_flag = bits.get_bits(9, 1) != 0; + + if (hdr_flag) + log_blk.m_solid_color_flag_hdr = true; + else + log_blk.m_solid_color_flag_ldr = true; + + log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16); + log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16); + log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16); + log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16); + + if (log_blk.m_solid_color_flag_hdr) + { + for (uint32_t c = 0; c < 4; c++) + if (is_half_inf_or_nan(log_blk.m_solid_color[c])) + return false; + } + + return true; + } + + struct astc_dec_row + { + int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + }; + + static const astc_dec_row s_dec_rows[10] = + { + // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2 + { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2 + { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8 + { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6 + + { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2 + { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2 + { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12 + { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10 + + { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6 + { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6 + }; + + static bool decode_config(const uint128& bits, log_astc_block& log_blk) + { + // Reserved + if (bits.get_bits(0, 4) == 0) + return false; + + // Reserved + if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111)) + { + if (bits.get_bits(2, 4) != 0b1111) + return false; + } + + // Void extent + if (bits.get_bits(0, 9) == 0b111111100) + return decode_void_extent(bits, log_blk); + + // Check rows + const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2); + const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1); + const uint32_t x7_2 = bits.get_bits(7, 2); + + int row_index = -1; + if (x0_2 == 0) + { + if (x7_2 == 0b00) + row_index = 5; + else if (x7_2 == 0b01) + row_index = 6; + else if (x5_4 == 0b1100) + row_index = 7; + else if (x5_4 == 0b1101) + row_index = 8; + else if (x7_2 == 0b10) + row_index = 9; + } + else + { + if (x2_2 == 0b00) + row_index = 0; + else if (x2_2 == 0b01) + row_index = 1; + else if (x2_2 == 0b10) + row_index = 2; + else if ((x2_2 == 0b11) && (x8_1 == 0)) + row_index = 3; + else if ((x2_2 == 0b11) && (x8_1 == 1)) + row_index = 4; + } + if (row_index < 0) + return false; + + const astc_dec_row& r = s_dec_rows[row_index]; + + bool P = false, Dp = false; + uint32_t W = r.W_bias, H = r.H_bias; + + if (r.P_ofs >= 0) + P = bits.get_bits(r.P_ofs, 1) != 0; + + if (r.Dp_ofs >= 0) + Dp = bits.get_bits(r.Dp_ofs, 1) != 0; + + if (r.W_size) + W += bits.get_bits(r.W_ofs, r.W_size); + + if (r.H_size) + H += bits.get_bits(r.H_ofs, r.H_size); + + assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM)); + assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM)); + + int p0 = bits.get_bits(r.p0_ofs, 1); + int p1 = bits.get_bits(r.p1_ofs, 1); + int p2 = bits.get_bits(r.p2_ofs, 1); + + uint32_t p = p0 | (p1 << 1) | (p2 << 2); + if (p < 2) + return false; + + log_blk.m_grid_width = (uint8_t)W; + log_blk.m_grid_height = (uint8_t)H; + + log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS)); + assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + + log_blk.m_dual_plane = Dp; + + return true; + } + + static inline uint32_t read_le_dword(const uint8_t* pBytes) + { + return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U); + } + + // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs. + static const uint8_t s_trit_decode[256][5] = + { + {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0}, + {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0}, + {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0}, + {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2}, + {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0}, + {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0}, + {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0}, + {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2}, + {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0}, + {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0}, + {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0}, + {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2}, + {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2}, + {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2}, + {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2}, + {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2}, + {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1}, + {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1}, + {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1}, + {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2}, + {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1}, + {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1}, + {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1}, + {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2}, + {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1}, + {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1}, + {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1}, + {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2}, + {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2}, + {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2}, + {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2}, + {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2} + }; + + static const uint8_t s_quint_decode[128][3] = + { + {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4}, + {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4}, + {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4}, + {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4}, + {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4}, + {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4}, + {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4}, + {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4}, + {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4}, + {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4}, + {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4}, + {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4}, + {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4}, + {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4}, + {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4}, + {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4} + }; + + static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 5)); + uint32_t m[5] = { 0 }, T = 0; + + static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_trits)[5] = s_trit_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]); + } + + static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 3)); + uint32_t m[3] = { 0 }, T = 0; + + static const uint8_t s_t_bits[3] = { 3, 2, 2 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_quints)[3] = s_quint_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]); + } + + static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs) + { + assert(num_vals && (ise_range < TOTAL_ISE_RANGES)); + + const uint32_t bits_per_val = g_ise_range_table[ise_range][0]; + + if (g_ise_range_table[ise_range][1]) + { + // Trits+bits, 5 vals per block, 7 bits extra per block + const uint32_t total_blocks = (num_vals + 4) / 5; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min(num_vals - 5 * b, 5); + decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else if (g_ise_range_table[ise_range][2]) + { + // Quints+bits, 3 vals per block, 8 bits extra per block + const uint32_t total_blocks = (num_vals + 2) / 3; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min(num_vals - 3 * b, 3); + decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else + { + assert(bits_per_val); + + // Only bits + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val); + } + } + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs) + { + const uint128 bits( + (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32)); + + return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs); + } + + // Decodes a physical ASTC block to a logical ASTC block. + // blk_width/blk_height are only used to validate the weight grid's dimensions. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height) + { + assert(is_valid_block_size(blk_width, blk_height)); + + const uint8_t* pS = (uint8_t*)pASTC_block; + + log_blk.clear(); + log_blk.m_error_flag = true; + + const uint128 bits( + (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32)); + + const uint128 rev_bits(bits.get_reversed_bits()); + + if (!decode_config(bits, log_blk)) + return false; + + if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr) + { + // Void extent + log_blk.m_error_flag = false; + return true; + } + + // Check grid dimensions + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + return false; + + // Now we have the grid width/height, dual plane, weight ISE range + + const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits; + + uint32_t total_extra_bits = 0; + + // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane. + + log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1); + if (log_blk.m_num_partitions == 1) + log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits + else + { + // 2 or more partitions + if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4)) + return false; + + log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10); + + uint32_t cem_bits = bits.get_bits(23, 6); + + if ((cem_bits & 3) == 0) + { + // All CEM's the same + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2); + } + else + { + // CEM's different, but within up to 2 adjacent classes + const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4; + + total_extra_bits = 3 * log_blk.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + + uint32_t c[4] = { 0 }, m[4] = { 0 }; + + cem_bits >>= 2; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1) + c[i] = cem_bits & 1; + + switch (log_blk.m_num_partitions) + { + case 2: + { + m[0] = cem_bits & 3; + m[1] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 3: + { + m[0] = cem_bits & 1; + m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1); + m[1] = bits.next_bits(cem_bit_pos, 2); + m[2] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 4: + { + for (uint32_t i = 0; i < 4; i++) + m[i] = bits.next_bits(cem_bit_pos, 2); + break; + } + default: + { + assert(0); + break; + } + } + + assert(cem_bit_pos == end_of_weight_bit_ofs); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]); + assert(log_blk.m_color_endpoint_modes[i] <= 15); + } + } + } + + // Now we have all the CEM indices. + + if (log_blk.m_dual_plane) + { + // Read CCS bits, beneath any CEM bits + total_extra_bits += 2; + + if (total_extra_bits > end_of_weight_bit_ofs) + return false; + + uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2)); + } + + uint32_t config_bit_pos = 11 + 2; // config+num_parts + if (log_blk.m_num_partitions == 1) + config_bit_pos += 4; // CEM bits + else + config_bit_pos += 10 + 6; // part_id+CEM bits + + // config+num_parts+total_extra_bits (CEM extra+CCS) + uint32_t total_config_bits = config_bit_pos + total_extra_bits; + + // Compute number of remaining bits in block + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + // Compute total number of ISE encoded color endpoint mode values + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_blk.m_num_partitions; j++) + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block + // TODO: Optimize + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int b = get_ise_sequence_bits(total_cem_vals, k); + if (b <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + // Decode endpoints forwards in block + decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos); + + // Decode grid weights backwards in block + decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0); + + log_blk.m_error_flag = false; + + return true; + } + + // Misc. helpers + + uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t i) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height)); + + const uint32_t idx = i * num_planes + plane_index; + assert(idx < MAX_GRID_WEIGHTS); + + return log_block.m_weights[idx]; + } + + uint8_t &get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t i) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height)); + + const uint32_t idx = i * num_planes + plane_index; + assert(idx < MAX_GRID_WEIGHTS); + + return log_block.m_weights[idx]; + } + + void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + + const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height; + for (uint32_t i = 0; i < num_weights; i++) + pWeights[i] = log_block.m_weights[i * num_planes + plane_index]; + } + + void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + + const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height; + for (uint32_t i = 0; i < num_weights; i++) + log_block.m_weights[i * num_planes + plane_index] = pWeights[i]; + } + + uint32_t get_total_weights(const log_astc_block& log_block) + { + return (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); + } + + // Returns a pointer to the beginning of a partition's/subset's endpoint values. + uint8_t *get_endpoints(log_astc_block& log_block, uint32_t partition_index) + { + assert(partition_index < log_block.m_num_partitions); + + uint32_t ofs = 0; + + for (uint32_t i = 0; i != partition_index; ++i) + ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]); + + assert(ofs < MAX_ENDPOINTS); + + return log_block.m_endpoints + ofs; + } + + const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index) + { + assert(partition_index < log_block.m_num_partitions); + + uint32_t ofs = 0; + + for (uint32_t i = 0; i != partition_index; ++i) + ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]); + + assert(ofs < MAX_ENDPOINTS); + + return log_block.m_endpoints + ofs; + } + + const char* get_cem_name(uint32_t cem_index) + { + static const char *s_cem_names[16] = + { + "CEM_LDR_LUM_DIRECT (0)", + "CEM_LDR_LUM_BASE_PLUS_OFS (1)", + "CEM_HDR_LUM_LARGE_RANGE (2)", + "CEM_HDR_LUM_SMALL_RANGE (3)", + "CEM_LDR_LUM_ALPHA_DIRECT (4)", + "CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS (5)", + "CEM_LDR_RGB_BASE_SCALE (6)", + "CEM_HDR_RGB_BASE_SCALE (7)", + "CEM_LDR_RGB_DIRECT (8)", + "CEM_LDR_RGB_BASE_PLUS_OFFSET (9)", + "CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A (10)", + "CEM_HDR_RGB (11)", + "CEM_LDR_RGBA_DIRECT (12)", + "CEM_LDR_RGBA_BASE_PLUS_OFFSET (13)", + "CEM_HDR_RGB_LDR_ALPHA (14)", + "CEM_HDR_RGB_HDR_ALPHA (15)" + }; + + assert(cem_index < std::size(s_cem_names)); + const char *p = s_cem_names[cem_index]; + assert(p); + return p; + } + + bool cem_is_ldr_direct(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT); + } + + bool cem_is_ldr_base_scale(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_BASE_SCALE) || (cem_index == CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + } + + bool cem_is_ldr_base_plus_ofs(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET); + } + + bool cem_supports_bc(uint32_t cem) + { + switch (cem) + { + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + return true; + default: + break; + } + return false; + } + + // input: + // a=[0,255] + // b=[0,255] + // output: + // a=from, converted to -32 to 31 + // b=to, shifted right by 1 and 1 bit added to MSB, so [0,255] + void bit_transfer_signed_dec(int& a, int& b) + { + assert((a >= 0) && (a <= 255)); + assert((b >= 0) && (b <= 255)); + + b >>= 1; + b |= (a & 0x80); + + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) + a -= 0x40; + } + + // transfers a bit from b to a, prepares a for encoding + // input: + // a=[-32,31] (6-bits, 2's complement) + // b=[0,255] (8-bits) + // output: + // a=[0,255] (preserve top 2 bits) + // b=[0,255] + void bit_transfer_signed_enc(int& a, int& b) + { + assert((a >= -32) && (a <= 31)); + assert((b >= 0) && (b <= 255)); + + // extract MSB of b + bool bit_to_transfer = (b & 0x80) != 0; + b = (b << 1) & 0xFF; // 7 bits to 8 + + a &= 0x3F; // 6 bits + a <<= 1; // 6 to 7 bits + if (bit_to_transfer) + a |= 0x80; // set MSB + } + + // RGB or RGBA direct + bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert((cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT)); + (void)(cem_index); + + const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + uint8_t dequantized_endpoints[6]; + for (uint32_t i = 0; i < 6; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + uint32_t s0 = dequantized_endpoints[0] + dequantized_endpoints[2] + dequantized_endpoints[4]; + uint32_t s1 = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5]; + + return s1 < s0; + } + + // RGB or RGBA base plus offset + bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert((cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET)); + (void)(cem_index); + + const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + int dequantized_endpoints[6]; + for (uint32_t i = 0; i < 6; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + bit_transfer_signed_dec(dequantized_endpoints[1], dequantized_endpoints[0]); + bit_transfer_signed_dec(dequantized_endpoints[3], dequantized_endpoints[2]); + bit_transfer_signed_dec(dequantized_endpoints[5], dequantized_endpoints[4]); + + int s = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5]; + + return s < 0; + } + + bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert(is_cem_ldr(cem_index)); + + bool used_blue_contraction_flag = false; + + if ((cem_index == 8) || (cem_index == 12)) + used_blue_contraction_flag = cem8_or_12_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index); + else if ((cem_index == 9) || (cem_index == 13)) + used_blue_contraction_flag = cem9_or_13_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index); + + return used_blue_contraction_flag; + } + + uint32_t get_base_cem_without_alpha(uint32_t cem) + { + assert(is_cem_ldr(cem)); + + switch (cem) + { + case CEM_LDR_LUM_ALPHA_DIRECT: return CEM_LDR_LUM_DIRECT; + case CEM_LDR_RGBA_DIRECT: return CEM_LDR_RGB_DIRECT; + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: return CEM_LDR_RGB_BASE_SCALE; + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: return CEM_LDR_RGB_BASE_PLUS_OFFSET; + default: + break; + } + + return cem; + } + + int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta) + { + if (delta == 0) + return ise_val; + + uint32_t num_ise_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + const auto& ISE_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_rank; + const auto& rank_to_ISE = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_rank_to_ISE; + + int cur_rank = ISE_to_rank[ise_val]; + int new_rank = basisu::clamp(cur_rank + delta, 0, (int)num_ise_levels - 1); + + return rank_to_ISE[new_rank]; + } + + void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height) + { + assert(index < NUM_ASTC_BLOCK_SIZES); + + width = g_astc_block_sizes[index][0]; + height = g_astc_block_sizes[index][1]; + } + + int find_astc_block_size_index(uint32_t width, uint32_t height) + { + for (uint32_t i = 0; i < NUM_ASTC_BLOCK_SIZES; i++) + if ((width == g_astc_block_sizes[i][0]) && (height == g_astc_block_sizes[i][1])) + return i; + + return -1; + } + +} // namespace astc_helpers + +#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_containers.h b/Source/ThirdParty/basis_universal/transcoder/basisu_containers.h new file mode 100644 index 000000000..662cf52b7 --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_containers.h @@ -0,0 +1,4343 @@ +// basisu_containers.h +#pragma once +#include +#include +#include +#include +#include + +#if defined(__linux__) && !defined(ANDROID) +// Only for malloc_usable_size() in basisu_containers_impl.h +#include +#define HAS_MALLOC_USABLE_SIZE 1 +#endif + +// Set to 1 to always check vector operator[], front(), and back() even in release. +#define BASISU_VECTOR_FORCE_CHECKING 0 + +// If 1, the vector container will not query the CRT to get the size of resized memory blocks. +#define BASISU_VECTOR_DETERMINISTIC 1 + +#ifdef _MSC_VER +#define BASISU_FORCE_INLINE __forceinline +#else +#define BASISU_FORCE_INLINE inline +#endif + +#define BASISU_HASHMAP_TEST 0 + +namespace basisu +{ + enum { cInvalidIndex = -1 }; + + template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + + template inline S maximum(S a, S b) { return (a > b) ? a : b; } + template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + template inline S minimum(S a, S b) { return (a < b) ? a : b; } + template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + +#ifdef _MSC_VER + __declspec(noreturn) +#else + [[noreturn]] +#endif + void container_abort(const char* pMsg, ...); + + namespace helpers + { + inline bool is_power_of_2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + + template const T& minimum(const T& a, const T& b) { return (b < a) ? b : a; } + template const T& maximum(const T& a, const T& b) { return (a < b) ? b : a; } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t floor_log2i(uint64_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t next_pow2(uint32_t val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64_t next_pow2(uint64_t val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + } // namespace helpers + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + inline void construct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T; + } + + template + inline void construct_array(T* p, size_t n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + p->~T(); + } + + template inline void destruct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + p->~T(); + } + + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { basisu::construct(p); } + static inline void construct(T* p, const T& init) { basisu::construct(p, init); } + static inline void construct_array(T* p, size_t n) { basisu::construct_array(p, n); } + static inline void destruct(T* p) { basisu::destruct(p); } + static inline void destruct_array(T* p, size_t n) { basisu::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { (void)p; } + static inline void destruct_array(T** p, size_t n) { (void)p, (void)n; } + }; + +#define BASISU_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { (void)p; } \ + static inline void destruct_array(X* p, size_t n) { (void)p, (void)n; } }; + + BASISU_DEFINE_BUILT_IN_TYPE(bool) + BASISU_DEFINE_BUILT_IN_TYPE(char) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned char) + BASISU_DEFINE_BUILT_IN_TYPE(short) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned short) + BASISU_DEFINE_BUILT_IN_TYPE(int) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned int) + BASISU_DEFINE_BUILT_IN_TYPE(long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long) +#ifdef __GNUC__ + BASISU_DEFINE_BUILT_IN_TYPE(long long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long long) +#else + BASISU_DEFINE_BUILT_IN_TYPE(__int64) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned __int64) +#endif + BASISU_DEFINE_BUILT_IN_TYPE(float) + BASISU_DEFINE_BUILT_IN_TYPE(double) + BASISU_DEFINE_BUILT_IN_TYPE(long double) + +#undef BASISU_DEFINE_BUILT_IN_TYPE + + template + struct bitwise_movable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + template + struct bitwise_copyable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; + +#define BASISU_IS_POD(T) __is_pod(T) + +#define BASISU_IS_SCALAR_TYPE(T) (scalar_type::cFlag) + +#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && (__GNUC__ < 5) +#define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__) +#else +#define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + + // TODO: clean this up, it's still confusing (copying vs. movable). +#define BASISU_IS_BITWISE_COPYABLE(T) (BASISU_IS_SCALAR_TYPE(T) || BASISU_IS_POD(T) || BASISU_IS_TRIVIALLY_COPYABLE(T) || std::is_trivial::value || (bitwise_copyable::cFlag)) + +#define BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (BASISU_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) + +#define BASISU_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T)) && (!std::is_trivially_destructible::value)) + + typedef char(&yes_t)[1]; + typedef char(&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + + struct empty_type { }; + + BASISU_DEFINE_BITWISE_COPYABLE(empty_type); + BASISU_DEFINE_BITWISE_MOVABLE(empty_type); + + template struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + struct elemental_vector + { + void* m_p; + size_t m_size; + size_t m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, size_t num); + + bool increase_capacity(size_t min_new_capacity, bool grow_hint, size_t element_size, object_mover pRelocate, bool nofail); + }; + + // Returns true if a+b would overflow a size_t. + inline bool add_overflow_check(size_t a, size_t b) + { + size_t c = a + b; + return c < a; + } + + // Returns false on overflow, true if OK. + template + inline bool can_fit_into_size_t(T val) + { + static_assert(std::is_integral::value, "T must be an integral type"); + + return (val >= 0) && (static_cast(val) == val); + } + + // Returns true if a*b would overflow a size_t. + inline bool mul_overflow_check(size_t a, size_t b) + { + // Avoid the division on 32-bit platforms + if (sizeof(size_t) == sizeof(uint32_t)) + return !can_fit_into_size_t(static_cast(a) * b); + else + return b && (a > (SIZE_MAX / b)); + } + + template + class writable_span; + + template + class readable_span + { + public: + using value_type = T; + using size_type = size_t; + using const_pointer = const T*; + using const_reference = const T&; + using const_iterator = const T*; + + inline readable_span() : + m_p(nullptr), + m_size(0) + { + } + + inline readable_span(const writable_span& other); + inline readable_span& operator= (const writable_span& rhs); + + inline readable_span(const_pointer p, size_t n) + { + set(p, n); + } + + inline readable_span(const_pointer s, const_pointer e) + { + set(s, e); + } + + inline readable_span(const readable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + } + + inline readable_span(readable_span&& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + + other.m_p = nullptr; + other.m_size = 0; + } + + template + inline readable_span(const T(&arr)[N]) : + m_p(arr), + m_size(N) + { + } + + template + inline readable_span& set(const T(&arr)[N]) + { + m_p = arr; + m_size = N; + return *this; + } + + inline readable_span& set(const_pointer p, size_t n) + { + if (!p && n) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = p; + m_size = n; + } + + return *this; + } + + inline readable_span& set(const_pointer s, const_pointer e) + { + if ((e < s) || (!s && e)) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = s; + m_size = e - s; + } + + return *this; + } + + inline bool operator== (const readable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator!= (const readable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + // only true if the region is totally inside the span + inline bool is_inside_ptr(const_pointer p, size_t n) const + { + if (!is_valid()) + { + assert(0); + return false; + } + + if (!p) + { + assert(!n); + return false; + } + + return (p >= m_p) && ((p + n) <= end()); + } + + inline bool is_inside(size_t ofs, size_t size) const + { + if (add_overflow_check(ofs, size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if ((ofs + size) > m_size) + return false; + + return true; + } + + inline readable_span subspan(size_t ofs, size_t n) const + { + if (!is_valid()) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + if (add_overflow_check(ofs, n)) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + if ((ofs + n) > m_size) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + return readable_span(m_p + ofs, n); + } + + void clear() + { + m_p = nullptr; + m_size = 0; + } + + inline bool empty() const { return !m_size; } + + // true if the span is non-nullptr and is not empty + inline bool is_valid() const { return m_p && m_size; } + + inline bool is_nullptr() const { return m_p == nullptr; } + + inline size_t size() const { return m_size; } + inline size_t size_in_bytes() const { assert(can_fit_into_size_t((uint64_t)m_size * sizeof(T))); return m_size * sizeof(T); } + + inline const_pointer get_ptr() const { return m_p; } + + inline const_iterator begin() const { return m_p; } + inline const_iterator end() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_iterator cbegin() const { return m_p; } + inline const_iterator cend() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_reference front() const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p[0]; + } + + inline const_reference back() const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p[m_size - 1]; + } + + inline readable_span& operator= (const readable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + inline readable_span& operator= (readable_span&& rhs) + { + if (this != &rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + rhs.m_p = nullptr; + rhs.m_size = 0; + } + + return *this; + } + + inline const_reference operator* () const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return *m_p; + } + + inline const_pointer operator-> () const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p; + } + + inline readable_span& remove_prefix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_p += n; + m_size -= n; + return *this; + } + + inline readable_span& remove_suffix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_size -= n; + return *this; + } + + inline readable_span& enlarge(size_t n) + { + if (!m_p) + { + assert(0); + return *this; + } + + if (add_overflow_check(m_size, n)) + { + assert(0); + return *this; + } + + m_size += n; + return *this; + } + + bool copy_from(size_t src_ofs, size_t src_size, T* pDst, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pDst) + { + assert(0); + return false; + } + + if (!is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = m_p + src_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pDst, pS, (size_t)num_bytes); + } + else + { + T* pD = pDst + dst_ofs; + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + inline const_reference operator[] (size_t idx) const + { + if ((!is_valid()) || (idx >= m_size)) + container_abort("readable_span: invalid span or index\n"); + + return m_p[idx]; + } + + inline uint16_t read_le16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + template + inline R read_val(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return (R)0; + } + + return *reinterpret_cast(&m_p[ofs]); + } + + inline uint16_t read_be16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t b = (uint8_t)m_p[ofs]; + const uint8_t a = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint32_t read_le32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + const uint8_t c = (uint8_t)m_p[ofs + 2]; + const uint8_t d = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint32_t read_be32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t d = (uint8_t)m_p[ofs]; + const uint8_t c = (uint8_t)m_p[ofs + 1]; + const uint8_t b = (uint8_t)m_p[ofs + 2]; + const uint8_t a = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint64_t read_le64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t l = read_le32(ofs); + const uint64_t h = read_le32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + inline uint64_t read_be64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t h = read_be32(ofs); + const uint64_t l = read_be32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + private: + const_pointer m_p; + size_t m_size; + }; + + template + class writable_span + { + friend readable_span; + + public: + using value_type = T; + using size_type = size_t; + using const_pointer = const T*; + using const_reference = const T&; + using const_iterator = const T*; + using pointer = T*; + using reference = T&; + using iterator = T*; + + inline writable_span() : + m_p(nullptr), + m_size(0) + { + } + + inline writable_span(T* p, size_t n) + { + set(p, n); + } + + inline writable_span(T* s, T* e) + { + set(s, e); + } + + inline writable_span(const writable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + } + + inline writable_span(writable_span&& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + + other.m_p = nullptr; + other.m_size = 0; + } + + template + inline writable_span(T(&arr)[N]) : + m_p(arr), + m_size(N) + { + } + + readable_span get_readable_span() const + { + return readable_span(m_p, m_size); + } + + template + inline writable_span& set(T(&arr)[N]) + { + m_p = arr; + m_size = N; + return *this; + } + + inline writable_span& set(T* p, size_t n) + { + if (!p && n) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = p; + m_size = n; + } + + return *this; + } + + inline writable_span& set(T* s, T* e) + { + if ((e < s) || (!s && e)) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = s; + m_size = e - s; + } + + return *this; + } + + inline bool operator== (const writable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator== (const readable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator!= (const writable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + inline bool operator!= (const readable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + // only true if the region is totally inside the span + inline bool is_inside_ptr(const_pointer p, size_t n) const + { + if (!is_valid()) + { + assert(0); + return false; + } + + if (!p) + { + assert(!n); + return false; + } + + return (p >= m_p) && ((p + n) <= end()); + } + + inline bool is_inside(size_t ofs, size_t size) const + { + if (add_overflow_check(ofs, size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if ((ofs + size) > m_size) + return false; + + return true; + } + + inline writable_span subspan(size_t ofs, size_t n) const + { + if (!is_valid()) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + if (add_overflow_check(ofs, n)) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + if ((ofs + n) > m_size) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + return writable_span(m_p + ofs, n); + } + + void clear() + { + m_p = nullptr; + m_size = 0; + } + + inline bool empty() const { return !m_size; } + + // true if the span is non-nullptr and is not empty + inline bool is_valid() const { return m_p && m_size; } + + inline bool is_nullptr() const { return m_p == nullptr; } + + inline size_t size() const { return m_size; } + inline size_t size_in_bytes() const { assert(can_fit_into_size_t((uint64_t)m_size * sizeof(T))); return m_size * sizeof(T); } + + inline T* get_ptr() const { return m_p; } + + inline iterator begin() const { return m_p; } + inline iterator end() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_iterator cbegin() const { return m_p; } + inline const_iterator cend() const { assert(m_p || !m_size); return m_p + m_size; } + + inline T& front() const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p[0]; + } + + inline T& back() const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p[m_size - 1]; + } + + inline writable_span& operator= (const writable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + inline writable_span& operator= (writable_span&& rhs) + { + if (this != &rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + rhs.m_p = nullptr; + rhs.m_size = 0; + } + + return *this; + } + + inline T& operator* () const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return *m_p; + } + + inline T* operator-> () const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p; + } + + inline bool set_all(size_t ofs, size_t size, const_reference val) + { + if (!size) + return true; + + if (!is_inside(ofs, size)) + { + assert(0); + return false; + } + + T* pDst = m_p + ofs; + + if ((sizeof(T) == sizeof(uint8_t)) && (BASISU_IS_BITWISE_COPYABLE(T))) + { + memset(pDst, (int)((uint8_t)val), size); + } + else + { + + T* pDst_end = pDst + size; + + while (pDst != pDst_end) + *pDst++ = val; + } + + return true; + } + + inline bool set_all(const_reference val) + { + return set_all(0, m_size, val); + } + + inline writable_span& remove_prefix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_p += n; + m_size -= n; + return *this; + } + + inline writable_span& remove_suffix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_size -= n; + return *this; + } + + inline writable_span& enlarge(size_t n) + { + if (!m_p) + { + assert(0); + return *this; + } + + if (add_overflow_check(m_size, n)) + { + assert(0); + return *this; + } + + m_size += n; + return *this; + } + + // copy from this span to the destination ptr + bool copy_from(size_t src_ofs, size_t src_size, T* pDst, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pDst) + { + assert(0); + return false; + } + + if (!is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = m_p + src_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pDst, pS, (size_t)num_bytes); + } + else + { + T* pD = pDst + dst_ofs; + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + // copy from the source ptr into this span + bool copy_into(const_pointer pSrc, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pSrc) + { + assert(0); + return false; + } + + if (add_overflow_check(src_ofs, src_size) || add_overflow_check(dst_ofs, src_size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if (!is_inside(dst_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = pSrc + src_ofs; + T* pD = m_p + dst_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pD, pS, (size_t)num_bytes); + } + else + { + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + // copy from a source span into this span + bool copy_into(const readable_span& src, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src.is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + return copy_into(src.get_ptr(), src_ofs, src_size, dst_ofs); + } + + // copy from a source span into this span + bool copy_into(const writable_span& src, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src.is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + return copy_into(src.get_ptr(), src_ofs, src_size, dst_ofs); + } + + inline T& operator[] (size_t idx) const + { + if ((!is_valid()) || (idx >= m_size)) + container_abort("writable_span: invalid span or index\n"); + + return m_p[idx]; + } + + template + inline R read_val(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return (R)0; + } + + return *reinterpret_cast(&m_p[ofs]); + } + + template + inline bool write_val(size_t ofs, R val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return false; + } + + *reinterpret_cast(&m_p[ofs]) = val; + return true; + } + + inline bool write_le16(size_t ofs, uint16_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + m_p[ofs] = (uint8_t)val; + m_p[ofs + 1] = (uint8_t)(val >> 8u); + return true; + } + + inline bool write_be16(size_t ofs, uint16_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + m_p[ofs + 1] = (uint8_t)val; + m_p[ofs] = (uint8_t)(val >> 8u); + return true; + } + + inline bool write_le32(size_t ofs, uint32_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return false; + } + + m_p[ofs] = (uint8_t)val; + m_p[ofs + 1] = (uint8_t)(val >> 8u); + m_p[ofs + 2] = (uint8_t)(val >> 16u); + m_p[ofs + 3] = (uint8_t)(val >> 24u); + return true; + } + + inline bool write_be32(size_t ofs, uint32_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return false; + } + + m_p[ofs + 3] = (uint8_t)val; + m_p[ofs + 2] = (uint8_t)(val >> 8u); + m_p[ofs + 1] = (uint8_t)(val >> 16u); + m_p[ofs] = (uint8_t)(val >> 24u); + return true; + } + + inline bool write_le64(size_t ofs, uint64_t val) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return false; + } + + return write_le32(ofs, (uint32_t)val) && write_le32(ofs + sizeof(uint32_t), (uint32_t)(val >> 32u)); + } + + inline bool write_be64(size_t ofs, uint64_t val) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return false; + } + + return write_be32(ofs + sizeof(uint32_t), (uint32_t)val) && write_be32(ofs, (uint32_t)(val >> 32u)); + } + + inline uint16_t read_le16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint16_t read_be16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t b = (uint8_t)m_p[ofs]; + const uint8_t a = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint32_t read_le32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + const uint8_t c = (uint8_t)m_p[ofs + 2]; + const uint8_t d = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint32_t read_be32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t d = (uint8_t)m_p[ofs]; + const uint8_t c = (uint8_t)m_p[ofs + 1]; + const uint8_t b = (uint8_t)m_p[ofs + 2]; + const uint8_t a = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint64_t read_le64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t l = read_le32(ofs); + const uint64_t h = read_le32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + inline uint64_t read_be64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t h = read_be32(ofs); + const uint64_t l = read_be32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + private: + T* m_p; + size_t m_size; + }; + + template + inline readable_span::readable_span(const writable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + } + + template + inline readable_span& readable_span::operator= (const writable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + template + inline bool span_copy(const writable_span& dst, const readable_span& src) + { + return dst.copy_into(src, 0, src.size(), 0); + } + + template + inline bool span_copy(const writable_span& dst, const writable_span& src) + { + return dst.copy_into(src, 0, src.size(), 0); + } + + template + inline bool span_copy(const writable_span& dst, size_t dst_ofs, const writable_span& src, size_t src_ofs, size_t len) + { + return dst.copy_into(src, src_ofs, len, dst_ofs); + } + + template + inline bool span_copy(const writable_span& dst, size_t dst_ofs, const readable_span& src, size_t src_ofs, size_t len) + { + return dst.copy_into(src, src_ofs, len, dst_ofs); + } + + template + class vector : public rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + } + + inline vector(size_t n, const T& init) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + construct_array(m_p, n, init); + m_size = n; + } + + inline vector(vector&& other) : + m_p(other.m_p), + m_size(other.m_size), + m_capacity(other.m_capacity) + { + other.m_p = nullptr; + other.m_size = 0; + other.m_capacity = 0; + } + + inline vector(const vector& other) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + if ((m_p) && (other.m_p)) + { + memcpy(m_p, other.m_p, m_size * sizeof(T)); + } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (size_t i = m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + } + + inline explicit vector(size_t size) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + resize(size); + } + + inline explicit vector(std::initializer_list init_list) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + resize(init_list.size()); + + size_t idx = 0; + for (const T& elem : init_list) + m_p[idx++] = elem; + + assert(idx == m_size); + } + + inline vector(const readable_span& rs) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + set(rs); + } + + inline vector(const writable_span& ws) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + set(ws); + } + + // mostly to ease porting from std::vector, not particularly optimized + inline void assign(size_t new_size, const T& init) + { + assert(!m_p || (&init < m_p) || (&init >= (m_p + m_size))); + + // Blow away existing contents + resize(0); + + if (new_size) + { + resize(new_size); + + for (size_t i = 0; i < new_size; ++i) + m_p[i] = init; + } + } + + // mostly to ease porting from std::vector, not particularly optimized + template + inline void assign(const R* pBegin, const R* pEnd) + { + assert(!m_p || + (reinterpret_cast(pEnd) <= reinterpret_cast(m_p)) || + (reinterpret_cast(pBegin) >= reinterpret_cast(m_p + m_size)) + ); + + // Blow away existing contents + resize(0); + + if ((!pBegin) || (!pEnd) || (pEnd <= pBegin)) + { + assert(0); + return; + } + + const size_t new_size = static_cast(static_cast(pEnd - pBegin)); + + if (new_size) + { + resize(new_size); + + for (size_t i = 0; i < new_size; ++i) + m_p[i] = static_cast(*pBegin++); + } + } + + // Set contents of vector to contents of the readable span + bool set(const readable_span& rs) + { + if (!rs.is_valid()) + { + assert(0); + return false; + } + + const size_t new_size = rs.size(); + + // Could call resize(), but it'll redundantly construct trivial types. + if (m_size != new_size) + { + if (new_size < m_size) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + } + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, false, true)) + return false; + } + } + + // Don't bother constructing trivial types, because we're going to memcpy() over them anyway. + if (!BASISU_IS_BITWISE_COPYABLE(T)) + { + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + if (!rs.copy_from(0, rs.size(), m_p, 0)) + { + assert(0); + return false; + } + + return true; + } + + // Set contents of vector to contents of the writable span + inline bool set(const writable_span& ws) + { + return set(ws.get_readable_span()); + } + + inline ~vector() + { + if (m_p) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p, m_size); + } + + free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + resize(0); + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + if ((m_p) && (other.m_p)) + memcpy((void *)m_p, other.m_p, other.m_size * sizeof(T)); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (size_t i = other.m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + inline vector& operator= (vector&& rhs) + { + if (this != &rhs) + { + clear(); + + m_p = rhs.m_p; + m_size = rhs.m_size; + m_capacity = rhs.m_capacity; + + rhs.m_p = nullptr; + rhs.m_size = 0; + rhs.m_capacity = 0; + } + return *this; + } + + BASISU_FORCE_INLINE const T* begin() const { return m_p; } + BASISU_FORCE_INLINE T* begin() { return m_p; } + + BASISU_FORCE_INLINE const T* end() const { return m_p + m_size; } + BASISU_FORCE_INLINE T* end() { return m_p + m_size; } + + BASISU_FORCE_INLINE bool empty() const { return !m_size; } + + BASISU_FORCE_INLINE size_t size() const { return m_size; } + BASISU_FORCE_INLINE uint32_t size_u32() const { assert(m_size <= UINT32_MAX); return static_cast(m_size); } + + BASISU_FORCE_INLINE size_t size_in_bytes() const { return m_size * sizeof(T); } + BASISU_FORCE_INLINE uint32_t size_in_bytes_u32() const { assert((m_size * sizeof(T)) <= UINT32_MAX); return static_cast(m_size * sizeof(T)); } + + BASISU_FORCE_INLINE size_t capacity() const { return m_capacity; } + +#if !BASISU_VECTOR_FORCE_CHECKING + BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } + BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } +#else + BASISU_FORCE_INLINE const T& operator[] (size_t i) const + { + if (i >= m_size) + container_abort("vector::operator[] invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + BASISU_FORCE_INLINE T& operator[] (size_t i) + { + if (i >= m_size) + container_abort("vector::operator[] invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } +#endif + + // at() always includes range checking, even in final builds, unlike operator []. + BASISU_FORCE_INLINE const T& at(size_t i) const + { + if (i >= m_size) + container_abort("vector::at() invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + BASISU_FORCE_INLINE T& at(size_t i) + { + if (i >= m_size) + container_abort("vector::at() invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + +#if !BASISU_VECTOR_FORCE_CHECKING + BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } + BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } + + BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } + BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } +#else + BASISU_FORCE_INLINE const T& front() const + { + if (!m_size) + container_abort("front: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[0]; + } + BASISU_FORCE_INLINE T& front() + { + if (!m_size) + container_abort("front: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[0]; + } + + BASISU_FORCE_INLINE const T& back() const + { + if (!m_size) + container_abort("back: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[m_size - 1]; + } + BASISU_FORCE_INLINE T& back() + { + if (!m_size) + container_abort("back: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[m_size - 1]; + } +#endif + + BASISU_FORCE_INLINE const T* get_ptr() const { return m_p; } + BASISU_FORCE_INLINE T* get_ptr() { return m_p; } + + BASISU_FORCE_INLINE const T* data() const { return m_p; } + BASISU_FORCE_INLINE T* data() { return m_p; } + + // clear() sets the container to empty, then frees the allocated block. + inline void clear() + { + if (m_p) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p, m_size); + } + + free(m_p); + + m_p = nullptr; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + free(m_p); + m_p = nullptr; + m_size = 0; + m_capacity = 0; + } + } + + inline void reserve(size_t new_capacity) + { + if (!try_reserve(new_capacity)) + container_abort("vector:reserve: try_reserve failed!\n"); + } + + inline bool try_reserve(size_t new_capacity) + { + if (new_capacity > m_capacity) + { + if (!increase_capacity(new_capacity, false, true)) + return false; + } + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true)) + return false; + + tmp = *this; + swap(tmp); + } + + return true; + } + + // try_resize(0) sets the container to empty, but does not free the allocated block. + inline bool try_resize(size_t new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + } + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + // resize(0) sets the container to empty, but does not free the allocated block. + inline void resize(size_t new_size, bool grow_hint = false) + { + if (!try_resize(new_size, grow_hint)) + container_abort("vector::resize failed, new size %zu\n", new_size); + } + + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + resize(0); + else + clear(); + } + + inline T* try_enlarge(size_t i) + { + size_t cur_size = m_size; + + if (add_overflow_check(cur_size, i)) + return nullptr; + + if (!try_resize(cur_size + i, true)) + return nullptr; + + return get_ptr() + cur_size; + } + + inline T* enlarge(size_t i) + { + T* p = try_enlarge(i); + if (!p) + container_abort("vector::enlarge failed, amount %zu!\n", i); + return p; + } + + BASISU_FORCE_INLINE void push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + BASISU_FORCE_INLINE void push_back_value(T&& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back_value: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + new ((void*)(m_p + m_size)) T(std::move(obj)); + m_size++; + } + + inline bool try_push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline bool try_push_back(T&& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + new ((void*)(m_p + m_size)) T(std::move(obj)); + m_size++; + + return true; + } + + // obj is explictly passed in by value, not ref + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back_value: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + // obj is explictly passed in by value, not ref + inline bool try_push_back_value(T obj) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + template + BASISU_FORCE_INLINE void emplace_back(Args&&... args) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::enlarge: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + new ((void*)(m_p + m_size)) T(std::forward(args)...); // perfect forwarding + m_size++; + } + + template + BASISU_FORCE_INLINE bool try_emplace_back(Args&&... args) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + new ((void*)(m_p + m_size)) T(std::forward(args)...); // perfect forwarding + m_size++; + + return true; + } + + inline void pop_back() + { + assert(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline bool try_insert(size_t index, const T* p, size_t n) + { + assert(index <= m_size); + + if (index > m_size) + return false; + + if (!n) + return true; + + const size_t orig_size = m_size; + + if (add_overflow_check(m_size, n)) + return false; + + if (!try_resize(m_size + n, true)) + return false; + + const size_t num_to_move = orig_size - index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + } + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (size_t i = 0; i < num_to_move; i++) + { + assert((uint64_t)(pDst - m_p) < (uint64_t)m_size); + + *pDst = std::move(*pSrc); + pDst--; + pSrc--; + } + } + + T* pDst = m_p + index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. + memcpy(pDst, p, sizeof(T) * n); + } + else + { + for (size_t i = 0; i < n; i++) + { + assert((uint64_t)(pDst - m_p) < (uint64_t)m_size); + *pDst++ = *p++; + } + } + + return true; + } + + inline void insert(size_t index, const T* p, size_t n) + { + if (!try_insert(index, p, n)) + container_abort("vector::insert() failed!\n"); + } + + inline bool try_insert(T* p, const T& obj) + { + if (p < begin()) + { + assert(0); + return false; + } + + uint64_t ofs = p - begin(); + + if (ofs > m_size) + { + assert(0); + return false; + } + + if ((size_t)ofs != ofs) + { + assert(0); + return false; + } + + return try_insert((size_t)ofs, &obj, 1); + } + + inline void insert(T* p, const T& obj) + { + if (!try_insert(p, obj)) + container_abort("vector::insert() failed!\n"); + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } + + inline bool try_push_front(const T& obj) + { + return try_insert(0, &obj, 1); + } + + vector& append(const vector& other) + { + if (other.m_size) + insert(m_size, &other[0], other.m_size); + return *this; + } + + bool try_append(const vector& other) + { + if (other.m_size) + return try_insert(m_size, &other[0], other.m_size); + + return true; + } + + vector& append(const T* p, size_t n) + { + if (n) + insert(m_size, p, n); + return *this; + } + + bool try_append(const T* p, size_t n) + { + if (n) + return try_insert(m_size, p, n); + + return true; + } + + inline bool erase(size_t start, size_t n) + { + if (add_overflow_check(start, n)) + { + assert(0); + return false; + } + + assert((start + n) <= m_size); + + if ((start + n) > m_size) + { + assert(0); + return false; + } + + if (!n) + return true; + + const size_t num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + // This test is overly cautious. + if ((!BASISU_IS_BITWISE_COPYABLE(T)) || (BASISU_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type::destruct_array(pDst, n); + } + + // Copy "down" the objects to preserve, filling in the empty slots. + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + + memmove((void *)pDst, pSrc, num_to_move * sizeof(T)); + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + } + else + { + // Type is not bitwise copyable or movable. + // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. + T* pDst_end = pDst + num_to_move; + + while (pDst != pDst_end) + { + *pDst = std::move(*pSrc); + + ++pDst; + ++pSrc; + } + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + + return true; + } + + inline bool erase_index(size_t index) + { + return erase(index, 1); + } + + inline bool erase(T* p) + { + assert((p >= m_p) && (p < (m_p + m_size))); + + if (p < m_p) + return false; + + return erase_index(static_cast(p - m_p)); + } + + inline bool erase(T* pFirst, T* pEnd) + { + assert(pFirst <= pEnd); + assert(pFirst >= begin() && pFirst <= end()); + assert(pEnd >= begin() && pEnd <= end()); + + if ((pFirst < begin()) || (pEnd < pFirst)) + { + assert(0); + return false; + } + + uint64_t ofs = pFirst - begin(); + if ((size_t)ofs != ofs) + { + assert(0); + return false; + } + + uint64_t n = pEnd - pFirst; + if ((size_t)n != n) + { + assert(0); + return false; + } + + return erase((size_t)ofs, (size_t)n); + } + + bool erase_unordered(size_t index) + { + if (index >= m_size) + { + assert(0); + return false; + } + + if ((index + 1) < m_size) + { + (*this)[index] = std::move(back()); + } + + pop_back(); + return true; + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (size_t i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const size_t min_size = helpers::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + const size_t j = m_size >> 1; + + for (size_t i = 0; i < j; i++) + std::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline bool find(const T& key, size_t &idx) const + { + idx = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + size_t index = 0; + + while (p != p_end) + { + if (key == *p) + { + idx = index; + return true; + } + + p++; + index++; + } + + return false; + } + + inline bool find_sorted(const T& key, size_t& idx) const + { + idx = 0; + + if (!m_size) + return false; + + // Inclusive range + size_t low = 0, high = m_size - 1; + + while (low <= high) + { + size_t mid = (size_t)(((uint64_t)low + (uint64_t)high) >> 1); + + const T* pTrial_key = m_p + mid; + + // Sanity check comparison operator + assert(!((*pTrial_key < key) && (key < *pTrial_key))); + + if (*pTrial_key < key) + { + if (add_overflow_check(mid, 1)) + break; + + low = mid + 1; + } + else if (key < *pTrial_key) + { + if (!mid) + break; + + high = mid - 1; + } + else + { + idx = mid; + return true; + } + } + + return false; + } + + inline size_t count_occurences(const T& key) const + { + size_t c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + { +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + memset(m_p, *reinterpret_cast(&o), m_size); + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + } + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + // Caller assumes ownership of the heap block associated with the container. Container is cleared. + // Caller must use free() on the returned pointer. + inline void* assume_ownership() + { + T* p = m_p; + m_p = nullptr; + m_size = 0; + m_capacity = 0; + return p; + } + + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + // The block must have been allocated using malloc(). + // Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method. + inline bool grant_ownership(T* p, size_t size, size_t capacity) + { + // To prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + assert(0); + return false; + } + + if (size > capacity) + { + assert(0); + return false; + } + + if (!p) + { + if (capacity) + { + assert(0); + return false; + } + } + else if (!capacity) + { + assert(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + + readable_span get_readable_span() const + { + return readable_span(m_p, m_size); + } + + writable_span get_writable_span() + { + return writable_span(m_p, m_size); + } + + private: + T* m_p; + size_t m_size; // the number of constructed objects + size_t m_capacity; // the size of the allocation + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, size_t num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + new ((void*)(pDst)) T(std::move(*pSrc)); + scalar_type::destruct(pSrc); + + ++pSrc; + ++pDst; + } + } + + inline bool increase_capacity(size_t min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? nullptr : object_mover, nofail); + } + }; + + template struct bitwise_movable< vector > { enum { cFlag = true }; }; + + // Hash map + // rg TODO 9/8/2024: I've upgraded this class to support 64-bit size_t, and it needs a lot more testing. + + const uint32_t SIZE_T_BITS = sizeof(size_t) * 8U; + + inline uint32_t safe_shift_left(uint32_t v, uint32_t l) + { + return (l < 32U) ? (v << l) : 0; + } + + inline uint64_t safe_shift_left(uint64_t v, uint32_t l) + { + return (l < 64U) ? (v << l) : 0; + } + + template + struct hasher + { + inline size_t operator() (const T& key) const { return static_cast(key); } + }; + + template + struct equal_to + { + inline bool operator()(const T& a, const T& b) const { return a == b; } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template, typename Equals = equal_to > + class hash_map + { + public: + class iterator; + class const_iterator; + + private: + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map hash_map_type; + typedef std::pair value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_num_valid(0), + m_grow_threshold(0), + m_hash_shift(SIZE_T_BITS) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + } + + hash_map(const hash_map& other) : + m_values(other.m_values), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + } + + hash_map(hash_map&& other) : + m_values(std::move(other.m_values)), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold), + m_hash_shift(other.m_hash_shift), + m_hasher(std::move(other.m_hasher)), + m_equals(std::move(other.m_equals)) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + + other.m_hash_shift = SIZE_T_BITS; + other.m_num_valid = 0; + other.m_grow_threshold = 0; + } + + hash_map& operator= (const hash_map& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; + + return *this; + } + + hash_map& operator= (hash_map&& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = std::move(other.m_values); + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = std::move(other.m_hasher); + m_equals = std::move(other.m_equals); + + other.m_hash_shift = SIZE_T_BITS; + other.m_num_valid = 0; + other.m_grow_threshold = 0; + + return *this; + } + + inline ~hash_map() + { + clear(); + } + + inline const Equals& get_equals() const { return m_equals; } + inline Equals& get_equals() { return m_equals; } + inline void set_equals(const Equals& equals) { m_equals = equals; } + + inline const Hasher& get_hasher() const { return m_hasher; } + inline Hasher& get_hasher() { return m_hasher; } + inline void set_hasher(const Hasher& hasher) { m_hasher = hasher; } + + inline void clear() + { + if (m_values.empty()) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = SIZE_T_BITS; + m_num_valid = 0; + m_grow_threshold = 0; + } + + // Destroys elements/empties container but doesn't free memory. + inline void reset() + { + if (!m_num_valid) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + else if (sizeof(node) <= 16) + { + memset((void *)&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_num_valid = 0; + } + + inline size_t size() + { + return m_num_valid; + } + + inline uint32_t size_u32() + { + return static_cast(m_num_valid); + } + + inline size_t get_table_size() + { + return m_values.size(); + } + + inline bool empty() + { + return !m_num_valid; + } + + inline bool reserve(size_t new_capacity) + { + if (!new_capacity) + return true; + + uint64_t new_hash_size = new_capacity; + + new_hash_size = new_hash_size * 2ULL; + + if (!helpers::is_power_of_2(new_hash_size)) + new_hash_size = helpers::next_pow2(new_hash_size); + + new_hash_size = helpers::maximum(cMinHashSize, new_hash_size); + + if (!can_fit_into_size_t(new_hash_size)) + { + assert(0); + return false; + } + + assert(new_hash_size >= new_capacity); + + if (new_hash_size <= m_values.size()) + return true; + + return rehash((size_t)new_hash_size); + } + + class iterator + { + friend class hash_map; + friend class hash_map::const_iterator; + + public: + inline iterator() : m_pTable(nullptr), m_index(0) { } + inline iterator(hash_map_type& table, size_t index) : m_pTable(&table), m_index(index) { } + inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const { return *get_cur(); } + inline value_type* operator->() const { return get_cur(); } + + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + + private: + hash_map_type* m_pTable; + size_t m_index; + + inline value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map; + friend class hash_map::iterator; + + public: + inline const_iterator() : m_pTable(nullptr), m_index(0) { } + inline const_iterator(const hash_map_type& table, size_t index) : m_pTable(&table), m_index(index) { } + inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline const_iterator& operator= (const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const { return *get_cur(); } + inline const value_type* operator->() const { return get_cur(); } + + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + + private: + const hash_map_type* m_pTable; + size_t m_index; + + inline const value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + return end(); + + return const_iterator(*this, find_next(std::numeric_limits::max())); + } + + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } + + inline iterator begin() + { + if (!m_num_valid) + return end(); + + return iterator(*this, find_next(std::numeric_limits::max())); + } + + inline iterator end() + { + return iterator(*this, m_values.size()); + } + + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_result.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). + typedef std::pair insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + if (!try_grow()) + container_abort("hash_map::try_grow() failed"); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + container_abort("hash_map::insert() failed"); + } + + return result; + } + + inline bool try_insert(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!insert_no_grow(result, k, v)) + { + if (!try_grow()) + return false; + + if (!insert_no_grow(result, k, v)) + return false; + } + + return true; + } + + inline insert_result insert(Key&& k, Value&& v = Value()) + { + insert_result result; + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + { + if (!try_grow()) + container_abort("hash_map::try_grow() failed"); + + // This must succeed. + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + container_abort("hash_map::insert() failed"); + } + + return result; + } + + inline bool try_insert(insert_result& result, Key&& k, Value&& v = Value()) + { + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + { + if (!try_grow()) + return false; + + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + return false; + } + + return true; + } + + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } + + inline bool try_insert(insert_result& result, const value_type& v) + { + return try_insert(result, v.first, v.second); + } + + inline insert_result insert(value_type&& v) + { + return insert(std::move(v.first), std::move(v.second)); + } + + inline bool try_insert(insert_result& result, value_type&& v) + { + return try_insert(result, std::move(v.first), std::move(v.second)); + } + + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } + + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); + } + + inline bool contains(const Key& k) const + { + const size_t idx = find_index(k); + return idx != m_values.size(); + } + + inline bool erase(const Key& k) + { + size_t i = find_index(k); + + if (i >= m_values.size()) + return false; + + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; + + m_num_valid--; + + for (; ; ) + { + size_t r, j = i; + + node* pSrc = pDst; + + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } + + if (!pSrc->state) + return true; + + r = hash_key(pSrc->first); + + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + + move_node(pDst, pSrc); + + pDst = pSrc; + } + } + + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); + } + + private: + struct node : public value_type + { + uint8_t state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy((void *)&pDst->first, &k, sizeof(Key)); + else + scalar_type::construct(&pDst->first, k); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy((void *)&pDst->second, &v, sizeof(Value)); + else + scalar_type::construct(&pDst->second, v); + } + + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value))) + { + memcpy((void *)pDst, pSrc, sizeof(value_type)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy((void *)&pDst->first, &pSrc->first, sizeof(Key)); + else + scalar_type::construct(&pDst->first, pSrc->first); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy((void *)&pDst->second, &pSrc->second, sizeof(Value)); + else + scalar_type::construct(&pDst->second, pSrc->second); + } + } + + static inline void destruct_value_type(value_type* p) + { + scalar_type::destruct(&p->first); + scalar_type::destruct(&p->second); + } + + // Moves nodes *pSrc to *pDst efficiently from one hashmap to another. + // pDst should NOT be constructed on entry. + static inline void move_node(node* pDst, node* pSrc, bool update_src_state = true) + { + assert(!pDst->state); + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy((void *)pDst, pSrc, sizeof(node)); + + assert(pDst->state == cStateValid); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) + memcpy((void*)&pDst->first, &pSrc->first, sizeof(Key)); + else + { + new ((void*)&pDst->first) Key(std::move(pSrc->first)); + scalar_type::destruct(&pSrc->first); + } + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + memcpy((void*)&pDst->second, &pSrc->second, sizeof(Value)); + else + { + new ((void*)&pDst->second) Value(std::move(pSrc->second)); + scalar_type::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + if (update_src_state) + pSrc->state = cStateInvalid; + } + + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast(this); + p->state = cStateInvalid; + } + + // In practice, this should never be called (right?). We manage destruction ourselves. + inline ~raw_node() + { + node* p = reinterpret_cast(this); + if (p->state) + hash_map_type::destruct_value_type(p); + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + pDst->state = cStateInvalid; + } + + inline raw_node& operator= (const raw_node& rhs) + { + if (this == &rhs) + return *this; + + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8_t m_bits[sizeof(node)]; + }; + + typedef basisu::vector node_vector; + + node_vector m_values; + + size_t m_num_valid; + size_t m_grow_threshold; + + uint32_t m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + inline size_t hash_key(const Key& k) const + { + assert((safe_shift_left(static_cast(1), (SIZE_T_BITS - m_hash_shift))) == m_values.size()); + + // Fibonacci hashing + if (SIZE_T_BITS == 32) + { + assert(m_hash_shift != 32); + + uint32_t hash = static_cast(m_hasher(k)); + hash = (2654435769U * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return (size_t)hash; + } + else + { + assert(m_hash_shift != 64); + + uint64_t hash = static_cast(m_hasher(k)); + hash = (0x9E3779B97F4A7C15ULL * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return (size_t)hash; + } + } + + inline const node& get_node(size_t index) const + { + return *reinterpret_cast(&m_values[index]); + } + + inline node& get_node(size_t index) + { + return *reinterpret_cast(&m_values[index]); + } + + inline state get_node_state(size_t index) const + { + return static_cast(get_node(index).state); + } + + inline void set_node_state(size_t index, bool valid) + { + get_node(index).state = valid; + } + + inline bool try_grow() + { + uint64_t n = m_values.size() * 2ULL; + + if (!helpers::is_power_of_2(n)) + n = helpers::next_pow2(n); + + if (!can_fit_into_size_t(n)) + { + assert(0); + return false; + } + + return rehash(helpers::maximum(cMinHashSize, (size_t)n)); + } + + // new_hash_size must be a power of 2. + inline bool rehash(size_t new_hash_size) + { + if (!helpers::is_power_of_2((uint64_t)new_hash_size)) + { + assert(0); + return false; + } + + if (new_hash_size < m_num_valid) + { + assert(0); + return false; + } + + if (new_hash_size == m_values.size()) + return true; + + hash_map new_map; + if (!new_map.m_values.try_resize(new_hash_size)) + return false; + + new_map.m_hash_shift = SIZE_T_BITS - helpers::floor_log2i((uint64_t)new_hash_size); + assert(new_hash_size == safe_shift_left(static_cast(1), SIZE_T_BITS - new_map.m_hash_shift)); + + new_map.m_grow_threshold = std::numeric_limits::max(); + + node* pNode = reinterpret_cast(m_values.begin()); + node* pNode_end = pNode + m_values.size(); + + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); + + if (new_map.m_num_valid == m_num_valid) + break; + } + + pNode++; + } + + new_map.m_grow_threshold = new_hash_size >> 1U; + if (new_hash_size & 1) + new_map.m_grow_threshold++; + + m_values.clear_no_destruction(); + m_hash_shift = SIZE_T_BITS; + + swap(new_map); + + return true; + } + + inline size_t find_next(size_t index) const + { + index++; + + if (index >= m_values.size()) + return index; + + const node* pNode = &get_node(index); + + for (; ; ) + { + if (pNode->state) + break; + + if (++index >= m_values.size()) + break; + + pNode++; + } + + return index; + } + + inline size_t find_index(const Key& k) const + { + if (m_num_valid) + { + size_t index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + return index; + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + break; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + return index; + } + } + } + + return m_values.size(); + } + + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v) + { + if (!m_values.size()) + return false; + + size_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + // Move user supplied key/value into a node. + static inline void move_value_type(value_type* pDst, Key&& k, Value&& v) + { + // Not checking for is MOVABLE because the caller could later destruct k and/or v (what state do we set them to?) + if (BASISU_IS_BITWISE_COPYABLE(Key)) + { + memcpy((void *)&pDst->first, (const void *)&k, sizeof(Key)); + } + else + { + new ((void*)&pDst->first) Key(std::move(k)); + // No destruction - user will do that (we don't own k). + } + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + { + memcpy((void *)&pDst->second, (const void*)&v, sizeof(Value)); + } + else + { + new ((void*)&pDst->second) Value(std::move(v)); + // No destruction - user will do that (we don't own v). + } + } + + // Insert user provided k/v, by moving, into the current hash table + inline bool insert_no_grow_move(insert_result& result, Key&& k, Value&& v) + { + if (!m_values.size()) + return false; + + size_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + move_value_type(pNode, std::move(k), std::move(v)); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + // Insert pNode by moving into the current hash table + inline void move_into(node* pNode) + { + size_t index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + assert(false); + return; + } + + if (!pDst_node->state) + break; + } + } + + // No need to update the source node's state (it's going away) + move_node(pDst_node, pNode, false); + + m_num_valid++; + } + }; + + template + struct bitwise_movable< hash_map > { enum { cFlag = true }; }; + +#if BASISU_HASHMAP_TEST + extern void hash_map_test(); +#endif + + // String formatting + inline std::string string_format(const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + return std::string(buf); + } + + enum class variant_type + { + cInvalid, + cI32, cU32, + cI64, cU64, + cFlt, cDbl, cBool, + cStrPtr, cStdStr + }; + + struct fmt_variant + { + union + { + int32_t m_i32; + uint32_t m_u32; + int64_t m_i64; + uint64_t m_u64; + float m_flt; + double m_dbl; + bool m_bool; + const char* m_pStr; + }; + + std::string m_str; + + variant_type m_type; + + inline fmt_variant() : + m_u64(0), + m_type(variant_type::cInvalid) + { + } + + inline fmt_variant(const fmt_variant& other) : + m_u64(other.m_u64), + m_str(other.m_str), + m_type(other.m_type) + { + } + + inline fmt_variant(fmt_variant&& other) : + m_u64(other.m_u64), + m_str(std::move(other.m_str)), + m_type(other.m_type) + { + other.m_type = variant_type::cInvalid; + other.m_u64 = 0; + } + + inline fmt_variant& operator= (fmt_variant&& other) + { + if (this == &other) + return *this; + + m_type = other.m_type; + m_u64 = other.m_u64; + m_str = std::move(other.m_str); + + other.m_type = variant_type::cInvalid; + other.m_u64 = 0; + + return *this; + } + + inline fmt_variant& operator= (const fmt_variant& rhs) + { + if (this == &rhs) + return *this; + + m_u64 = rhs.m_u64; + m_type = rhs.m_type; + m_str = rhs.m_str; + + return *this; + } + + inline fmt_variant(int32_t v) : m_i32(v), m_type(variant_type::cI32) { } + inline fmt_variant(uint32_t v) : m_u32(v), m_type(variant_type::cU32) { } + inline fmt_variant(int64_t v) : m_i64(v), m_type(variant_type::cI64) { } + inline fmt_variant(uint64_t v) : m_u64(v), m_type(variant_type::cU64) { } +#ifdef _MSC_VER + inline fmt_variant(unsigned long v) : m_u64(v), m_type(variant_type::cU64) {} + inline fmt_variant(long v) : m_i64(v), m_type(variant_type::cI64) {} +#endif + inline fmt_variant(float v) : m_flt(v), m_type(variant_type::cFlt) { } + inline fmt_variant(double v) : m_dbl(v), m_type(variant_type::cDbl) { } + inline fmt_variant(const char* pStr) : m_pStr(pStr), m_type(variant_type::cStrPtr) { } + inline fmt_variant(const std::string& str) : m_u64(0), m_str(str), m_type(variant_type::cStdStr) { } + inline fmt_variant(bool val) : m_bool(val), m_type(variant_type::cBool) { } + + bool to_string(std::string& res, std::string& fmt) const; + }; + + typedef basisu::vector fmt_variant_vec; + + bool fmt_variants(std::string& res, const char* pFmt, const fmt_variant_vec& variants); + + template + inline bool fmt_string(std::string& res, const char* pFmt, Args&&... args) + { + return fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... }); + } + + template + inline std::string fmt_string(const char* pFmt, Args&&... args) + { + std::string res; + fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... }); + return res; + } + + template + inline int fmt_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return EOF; + + return fputs(res.c_str(), stdout); + } + + template + inline int fmt_fprintf(FILE* pFile, const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return EOF; + + return fputs(res.c_str(), pFile); + } + + // fixed_array - zero initialized by default, operator[] is always bounds checked. + template + class fixed_array + { + static_assert(N >= 1, "fixed_array size must be at least 1"); + + public: + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + using iterator = T*; + using const_iterator = const T*; + + T m_data[N]; + + BASISU_FORCE_INLINE fixed_array() + { + initialize_array(); + } + + BASISU_FORCE_INLINE fixed_array(std::initializer_list list) + { + assert(list.size() <= N); + + std::size_t copy_size = std::min(list.size(), N); + std::copy_n(list.begin(), copy_size, m_data); // Copy up to min(list.size(), N) + + if (list.size() < N) + { + // Initialize the rest of the array + std::fill(m_data + copy_size, m_data + N, T{}); + } + } + + BASISU_FORCE_INLINE T& operator[](std::size_t index) + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE const T& operator[](std::size_t index) const + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE T* begin() { return m_data; } + BASISU_FORCE_INLINE const T* begin() const { return m_data; } + + BASISU_FORCE_INLINE T* end() { return m_data + N; } + BASISU_FORCE_INLINE const T* end() const { return m_data + N; } + + BASISU_FORCE_INLINE const T* data() const { return m_data; } + BASISU_FORCE_INLINE T* data() { return m_data; } + + BASISU_FORCE_INLINE const T& front() const { return m_data[0]; } + BASISU_FORCE_INLINE T& front() { return m_data[0]; } + + BASISU_FORCE_INLINE const T& back() const { return m_data[N - 1]; } + BASISU_FORCE_INLINE T& back() { return m_data[N - 1]; } + + BASISU_FORCE_INLINE constexpr std::size_t size() const { return N; } + + BASISU_FORCE_INLINE void clear() + { + initialize_array(); // Reinitialize the array + } + + BASISU_FORCE_INLINE void set_all(const T& value) + { + std::fill(m_data, m_data + N, value); + } + + BASISU_FORCE_INLINE readable_span get_readable_span() const + { + return readable_span(m_data, N); + } + + BASISU_FORCE_INLINE writable_span get_writable_span() + { + return writable_span(m_data, N); + } + + private: + BASISU_FORCE_INLINE void initialize_array() + { + if IF_CONSTEXPR (std::is_integral::value || std::is_floating_point::value) + memset(m_data, 0, sizeof(m_data)); + else + std::fill(m_data, m_data + N, T{}); + } + + BASISU_FORCE_INLINE T& access_element(std::size_t index) + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE const T& access_element(std::size_t index) const + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + }; + + // 2D array + + template + class vector2D + { + typedef basisu::vector vec_type; + + uint32_t m_width, m_height; + vec_type m_values; + + public: + vector2D() : + m_width(0), + m_height(0) + { + } + + vector2D(uint32_t w, uint32_t h) : + m_width(0), + m_height(0) + { + resize(w, h); + } + + vector2D(const vector2D& other) + { + *this = other; + } + + vector2D(vector2D&& other) : + m_width(0), + m_height(0) + { + *this = std::move(other); + } + + vector2D& operator= (const vector2D& other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = other.m_values; + } + return *this; + } + + vector2D& operator= (vector2D&& other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = std::move(other.m_values); + + other.m_width = 0; + other.m_height = 0; + } + return *this; + } + + inline bool operator== (const vector2D& rhs) const + { + return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); + } + + inline size_t size_in_bytes() const { return m_values.size_in_bytes(); } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + + inline uint32_t get_cols() const { return m_width; } + inline uint32_t get_rows() const { return m_height; } + + inline const T& operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + inline T& operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + + inline size_t size() const { return m_values.size(); } + + inline const T& operator[] (uint32_t i) const { return m_values[i]; } + inline T& operator[] (uint32_t i) { return m_values[i]; } + + inline const T& at(int x, int y) const { return (*this)((uint32_t)x, (uint32_t)y); } + inline T& at(int x, int y) { return (*this)((uint32_t)x, (uint32_t)y); } + + inline const T& at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline T& at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const T& at_row_col(int y, int x) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline T& at_row_col(int y, int x) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline void set_clipped(int x, int y, const T& val) + { + if ( ((uint32_t)x >= m_width) || ((uint32_t)y >= m_height) ) + return; + + m_values[x + y * m_width] = val; + } + + void clear() + { + m_width = 0; + m_height = 0; + m_values.clear(); + } + + void set_all(const T& val) + { + vector_set_all(m_values, val); + } + + inline const T* get_ptr() const { return m_values.data(); } + inline T* get_ptr() { return m_values.data(); } + + vector2D& resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + const uint64_t total_vals = (uint64_t)new_width * new_height; + + if (!can_fit_into_size_t(total_vals)) + { + // What can we do? + assert(0); + return *this; + } + + vec_type oldVals((size_t)total_vals); + oldVals.swap(m_values); + + const uint32_t w = minimum(m_width, new_width); + const uint32_t h = minimum(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return *this; + } + + bool try_resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return true; + + const uint64_t total_vals = (uint64_t)new_width * new_height; + + if (!can_fit_into_size_t(total_vals)) + { + // What can we do? + assert(0); + return false; + } + + vec_type oldVals; + if (!oldVals.try_resize((size_t)total_vals)) + return false; + + oldVals.swap(m_values); + + const uint32_t w = minimum(m_width, new_width); + const uint32_t h = minimum(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return true; + } + + vector2D& resize_rows_cols(uint32_t rows, uint32_t cols) + { + return resize(cols, rows); + } + + bool try_resize_rows_cols(uint32_t rows, uint32_t cols) + { + return try_resize(cols, rows); + } + + const vector2D& extract_block_clamped(T* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + if (((src_x + w) > m_width) || ((src_y + h) > m_height)) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = at_clamped(src_x + x, src_y + y); + } + else + { + const T* pSrc = &m_values[src_x + src_y * m_width]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(T)); + pSrc += m_width; + pDst += w; + } + } + + return *this; + } + + const vector2D& extract_block_clamped(T* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h, uint32_t override_height) const + { + assert(override_height && (override_height <= m_height)); + + if (((src_x + w) > m_width) || ((src_y + h) > minimum(m_height, override_height))) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = at_clamped(src_x + x, minimum(src_y + y, override_height - 1)); + } + else + { + const T* pSrc = &m_values[src_x + src_y * m_width]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(T)); + pSrc += m_width; + pDst += w; + } + } + + return *this; + } + }; + + // Explictly primitive container intended for POD's, simple usage. + // push_back() and resize() will refuse to push anymore and just return when full. + template + class static_vector + { + T m_data[N]; + uint32_t m_size; + + public: + static_vector() : m_size(0) { } + + inline void reserve(size_t reserve_size) + { + (void)(reserve_size); + + assert(reserve_size <= N); + } + + inline void push_back(const T& value) + { + // Should never happen. + if (m_size >= N) + { + assert(0); + fprintf(stderr, "basisu::static_vector overflow!\n"); + return; + } + + m_data[m_size++] = value; + } + + inline std::size_t size() const { return m_size; } + inline uint32_t size_u32() const { return m_size; } + inline constexpr std::size_t capacity() const { return N; } + + inline bool empty() const { return !m_size; } + + inline T& operator[](std::size_t i) { return m_data[i]; } + inline const T& operator[](std::size_t i) const { return m_data[i]; } + + inline void resize(size_t new_size) + { + if (new_size > N) + { + assert(0); + fprintf(stderr, "basisu::static_vector overflow!\n"); + return; + } + + m_size = (uint32_t)new_size; + } + }; + +} // namespace basisu + +namespace std +{ + template + inline void swap(basisu::vector& a, basisu::vector& b) + { + a.swap(b); + } + + template + inline void swap(basisu::hash_map& a, basisu::hash_map& b) + { + a.swap(b); + } + +} // namespace std diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_containers_impl.h b/Source/ThirdParty/basis_universal/transcoder/basisu_containers_impl.h new file mode 100644 index 000000000..2ac13029d --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_containers_impl.h @@ -0,0 +1,817 @@ +// basisu_containers_impl.h +// Do not include directly + +#include + +#ifdef _MSC_VER +#pragma warning (disable:4127) // warning C4127: conditional expression is constant +#endif + +namespace basisu +{ + // A container operation has internally panicked in an unrecoverable way. + // Either an allocation has failed, or a range or consistency check has failed. +#ifdef _MSC_VER + __declspec(noreturn) +#else + [[noreturn]] +#endif + void container_abort(const char* pMsg, ...) + { + assert(0); + + va_list args; + va_start(args, pMsg); + + char buf[1024] = {}; + +#ifdef _MSC_VER + vsprintf_s(buf, sizeof(buf), pMsg, args); +#else + vsnprintf(buf, sizeof(buf), pMsg, args); +#endif + va_end(args); + + fputs(buf, stderr); + + std::terminate(); + } + + bool elemental_vector::increase_capacity(size_t min_new_capacity, bool grow_hint, size_t element_size, object_mover pMover, bool nofail_flag) + { + assert(m_size <= m_capacity); + assert(min_new_capacity >= m_size); + assert(element_size); + + // Basic sanity check min_new_capacity + if (!can_fit_into_size_t((uint64_t)min_new_capacity * element_size)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: requesting too many elements\n"); + } + + // Check for sane library limits + if (sizeof(void*) == sizeof(uint64_t)) + { + // 16 GB + assert(min_new_capacity < (0x400000000ULL / element_size)); + } + else + { + // ~1.99 GB + assert(min_new_capacity < (0x7FFF0000U / element_size)); + } + + // If vector is already large enough just return. + if (m_capacity >= min_new_capacity) + return true; + + uint64_t new_capacity_u64 = min_new_capacity; + + if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64))) + { + new_capacity_u64 = helpers::next_pow2(new_capacity_u64); + + if (!can_fit_into_size_t(new_capacity_u64)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: vector too large\n"); + } + } + + const uint64_t desired_size_u64 = element_size * new_capacity_u64; + + if (!can_fit_into_size_t(desired_size_u64)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: vector too large\n"); + } + + const size_t desired_size = static_cast(desired_size_u64); + + size_t actual_size = 0; + BASISU_NOTE_UNUSED(actual_size); + + if (!pMover) + { + void* new_p = realloc(m_p, desired_size); + if (!new_p) + { + fprintf(stderr, "elemental_vector::increase_capacity: Allocation failed!\n"); + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size); + } + +#if BASISU_VECTOR_DETERMINISTIC + actual_size = desired_size; +#elif defined(_MSC_VER) + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + m_p = new_p; + } + else + { + void* new_p = malloc(desired_size); + if (!new_p) + { + fprintf(stderr, "elemental_vector::increase_capacity: Allocation failed!\n"); + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size); + } + +#if BASISU_VECTOR_DETERMINISTIC + actual_size = desired_size; +#elif defined(_MSC_VER) + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + free(m_p); + + m_p = new_p; + } + +#if BASISU_VECTOR_DETERMINISTIC + m_capacity = static_cast(new_capacity_u64); +#else + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = static_cast(new_capacity_u64); +#endif + + return true; + } + +#if BASISU_HASHMAP_TEST + +#define HASHMAP_TEST_VERIFY(c) do { if (!(c)) handle_hashmap_test_verify_failure(__LINE__); } while(0) + + static void handle_hashmap_test_verify_failure(int line) + { + container_abort("HASHMAP_TEST_VERIFY() faild on line %i\n", line); + } + + class counted_obj + { + public: + counted_obj(uint32_t v = 0) : + m_val(v) + { + m_count++; + } + + counted_obj(const counted_obj& obj) : + m_val(obj.m_val) + { + if (m_val != UINT64_MAX) + m_count++; + } + + counted_obj(counted_obj&& obj) : + m_val(obj.m_val) + { + obj.m_val = UINT64_MAX; + } + + counted_obj& operator= (counted_obj&& rhs) + { + if (this != &rhs) + { + m_val = rhs.m_val; + rhs.m_val = UINT64_MAX; + } + return *this; + } + + ~counted_obj() + { + if (m_val != UINT64_MAX) + { + assert(m_count > 0); + m_count--; + } + } + + static uint32_t m_count; + + uint64_t m_val; + + operator size_t() const { return (size_t)m_val; } + + bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } + bool operator== (const uint32_t rhs) const { return m_val == rhs; } + + }; + + uint32_t counted_obj::m_count; + + static uint32_t urand32() + { + uint32_t a = rand(); + uint32_t b = rand() << 15; + uint32_t c = rand() << (32 - 15); + return a ^ b ^ c; + } + + static int irand32(int l, int h) + { + assert(l < h); + if (l >= h) + return l; + + uint32_t range = static_cast(h - l); + + uint32_t rnd = urand32(); + + uint32_t rnd_range = static_cast((((uint64_t)range) * ((uint64_t)rnd)) >> 32U); + + int result = l + rnd_range; + assert((result >= l) && (result < h)); + return result; + } + + void hash_map_test() + { + { + basisu::hash_map s; + uint_vec k; + + for (uint32_t i = 0; i < 1000000; i++) + { + s.insert(i); + k.push_back(i); + } + + for (uint32_t i = 0; i < k.size(); i++) + { + uint32_t r = rand() ^ (rand() << 15); + + uint32_t j = i + (r % (k.size() - i)); + + std::swap(k[i], k[j]); + } + + basisu::hash_map s1(s); + + for (uint32_t i = 0; i < 1000000; i++) + { + auto res = s.find(i); + HASHMAP_TEST_VERIFY(res != s.end()); + HASHMAP_TEST_VERIFY(res->first == i); + s.erase(i); + } + + for (uint32_t it = 0; it < 1000000; it++) + { + uint32_t i = k[it]; + + auto res = s1.find(i); + HASHMAP_TEST_VERIFY(res != s.end()); + HASHMAP_TEST_VERIFY(res->first == i); + s1.erase(i); + } + + for (uint32_t i = 0; i < 1000000; i++) + { + auto res = s.find(i); + HASHMAP_TEST_VERIFY(res == s.end()); + + auto res1 = s1.find(i); + HASHMAP_TEST_VERIFY(res1 == s1.end()); + } + + HASHMAP_TEST_VERIFY(s.empty()); + HASHMAP_TEST_VERIFY(s1.empty()); + } + + { + typedef basisu::hash_map< uint32_t, basisu::vector > hm; + hm q; + + basisu::vector a, b; + a.push_back(1); + b.push_back(2); + b.push_back(3); + + basisu::vector c(b); + + hm::insert_result ir; + q.try_insert(ir, 1, std::move(a)); + q.try_insert(ir, 2, std::move(b)); + q.try_insert(ir, std::make_pair(3, c)); + } + + { + typedef basisu::hash_map my_hash_map; + my_hash_map m; + counted_obj a, b; + m.insert(std::move(a), std::move(b)); + } + + { + basisu::hash_map k; + basisu::hash_map l; + std::swap(k, l); + + k.begin(); + k.end(); + k.clear(); + k.empty(); + k.erase(0); + k.insert(0, 1); + k.find(0); + k.get_equals(); + k.get_hasher(); + k.get_table_size(); + k.reset(); + k.reserve(1); + k = l; + k.set_equals(l.get_equals()); + k.set_hasher(l.get_hasher()); + k.get_table_size(); + } + + uint32_t seed = 0; + for (; ; ) + { + seed++; + + typedef basisu::hash_map my_hash_map; + my_hash_map m; + + const uint32_t n = irand32(1, 100000); + + printf("%u\n", n); + + srand(seed); // r1.seed(seed); + + basisu::vector q; + + uint32_t count = 0; + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); + if (res.second) + { + count++; + q.push_back(v); + } + } + + HASHMAP_TEST_VERIFY(m.size() == count); + + srand(seed); + + my_hash_map cm(m); + m.clear(); + m = cm; + cm.reset(); + + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == v); + HASHMAP_TEST_VERIFY(it->second == (v ^ 0xdeadbeef)); + } + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t nd = irand32(1, q.size_u32() + 1); + for (uint32_t i = 0; i < nd; i++) + { + uint32_t p = irand32(0, q.size_u32()); + + int k = q[p]; + if (k >= 0) + { + q[p] = -k - 1; + + bool s = m.erase(counted_obj(k)); + HASHMAP_TEST_VERIFY(s); + } + } + + typedef basisu::hash_map uint_hash_set; + uint_hash_set s; + + for (uint32_t i = 0; i < q.size(); i++) + { + int v = q[i]; + + if (v >= 0) + { + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == (uint32_t)v); + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)v ^ 0xdeadbeef)); + + s.insert(v); + } + else + { + my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); + HASHMAP_TEST_VERIFY(it == m.end()); + } + } + + uint32_t found_count = 0; + for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) + { + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)it->first ^ 0xdeadbeef)); + + uint_hash_set::const_iterator fit(s.find((uint32_t)it->first)); + HASHMAP_TEST_VERIFY(fit != s.end()); + + HASHMAP_TEST_VERIFY(fit->first == it->first); + + found_count++; + } + + HASHMAP_TEST_VERIFY(found_count == s.size()); + } + + HASHMAP_TEST_VERIFY(counted_obj::m_count == m.size() * 2); + } + } + +#endif // BASISU_HASHMAP_TEST + + // String formatting + + bool fmt_variant::to_string(std::string& res, std::string& fmt) const + { + res.resize(0); + + // Scan for allowed formatting characters. + for (size_t i = 0; i < fmt.size(); i++) + { + const char c = fmt[i]; + + if (isdigit(c) || (c == '.') || (c == ' ') || (c == '#') || (c == '+') || (c == '-')) + continue; + + if (isalpha(c)) + { + if ((i + 1) == fmt.size()) + continue; + } + + return false; + } + + if (fmt.size() && (fmt.back() == 'c')) + { + if ((m_type == variant_type::cI32) || (m_type == variant_type::cU32)) + { + if (m_u32 > 255) + return false; + + // Explictly allowing caller to pass in a char of 0, which is ignored. + if (m_u32) + res.push_back((uint8_t)m_u32); + return true; + } + else + return false; + } + + switch (m_type) + { + case variant_type::cInvalid: + { + return false; + } + case variant_type::cI32: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u')) + return false; + } + else + { + fmt += "i"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_i32); + } + else + { + res = string_format("%i", m_i32); + } + break; + } + case variant_type::cU32: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u')) + return false; + } + else + { + fmt += "u"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_u32); + } + else + { + res = string_format("%u", m_u32); + } + break; + } + case variant_type::cI64: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if (e == 'x') + { + fmt.pop_back(); + fmt += PRIx64; + } + else if (e == 'X') + { + fmt.pop_back(); + fmt += PRIX64; + } + else + return false; + } + else + { + fmt += PRId64; + } + + res = string_format((std::string("%") + fmt).c_str(), m_i64); + } + else + { + res = string_format("%" PRId64, m_i64); + } + break; + } + case variant_type::cU64: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if (e == 'x') + { + fmt.pop_back(); + fmt += PRIx64; + } + else if (e == 'X') + { + fmt.pop_back(); + fmt += PRIX64; + } + else + return false; + } + else + { + fmt += PRIu64; + } + + res = string_format((std::string("%") + fmt).c_str(), m_u64); + } + else + { + res = string_format("%" PRIu64, m_u64); + } + break; + } + case variant_type::cFlt: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E')) + return false; + } + else + { + fmt += "f"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_flt); + } + else + { + res = string_format("%f", m_flt); + } + break; + } + case variant_type::cDbl: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E')) + return false; + } + else + { + fmt += "f"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_dbl); + } + else + { + res = string_format("%f", m_dbl); + } + break; + } + case variant_type::cStrPtr: + { + if (fmt.size()) + return false; + if (!m_pStr) + return false; + res = m_pStr; + break; + } + case variant_type::cBool: + { + if (fmt.size()) + return false; + res = m_bool ? "true" : "false"; + break; + } + case variant_type::cStdStr: + { + if (fmt.size()) + return false; + res = m_str; + break; + } + default: + { + return false; + } + } + + return true; + } + + bool fmt_variants(std::string& res, const char* pFmt, const fmt_variant_vec& variants) + { + res.resize(0); + + // Must specify a format string + if (!pFmt) + { + assert(0); + return false; + } + + // Check format string's length + const size_t fmt_len = strlen(pFmt); + if (!fmt_len) + { + if (variants.size()) + { + assert(0); + return false; + } + return true; + } + + // Wildly estimate output length + res.reserve(fmt_len + 32); + + std::string var_fmt; + var_fmt.reserve(16); + + std::string tmp; + tmp.reserve(16); + + size_t variant_index = 0; + bool inside_brackets = false; + const char* p = pFmt; + + while (*p) + { + const uint8_t c = *p++; + + if (inside_brackets) + { + if (c == '}') + { + inside_brackets = false; + + if (variant_index >= variants.size()) + { + assert(0); + return false; + } + + if (!variants[variant_index].to_string(tmp, var_fmt)) + { + assert(0); + return false; + } + + res += tmp; + + variant_index++; + } + else + { + // Check for forbidden formatting characters. + if ((c == '*') || (c == 'n') || (c == '%')) + { + assert(0); + return false; + } + + var_fmt.push_back(c); + } + } + else if (c == '{') + { + // Check for escaped '{' + if (*p == '{') + { + res.push_back((char)c); + p++; + } + else + { + inside_brackets = true; + var_fmt.resize(0); + } + } + else + { + res.push_back((char)c); + } + } + + if (inside_brackets) + { + assert(0); + return false; + } + + if (variant_index != variants.size()) + { + assert(0); + return false; + } + + return true; + } + +} // namespace basisu diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_file_headers.h b/Source/ThirdParty/basis_universal/transcoder/basisu_file_headers.h new file mode 100644 index 000000000..10462774f --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_file_headers.h @@ -0,0 +1,245 @@ +// basis_file_headers.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + // Slice desc header flags + enum basis_slice_desc_flags + { + cSliceDescFlagsHasAlpha = 1, + + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. + cSliceDescFlagsFrameIsIFrame = 2 + }; + +#pragma pack(push) +#pragma pack(1) + struct basis_slice_desc + { + basisu::packed_uint<3> m_image_index; // The index of the source image provided to the encoder (will always appear in order from first to last, first image index is 0, no skipping allowed) + basisu::packed_uint<1> m_level_index; // The mipmap level index (mipmaps will always appear from largest to smallest) + basisu::packed_uint<1> m_flags; // enum basis_slice_desc_flags + + basisu::packed_uint<2> m_orig_width; // The original image width (may not be a multiple of 4 pixels) + basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) + + basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 or 6x6 pixels. The slice's pixel resolution may or may not be a power of 2. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + + basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data + basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes + + basisu::packed_uint<2> m_slice_data_crc16; // The CRC16 of the compressed slice data, for extra-paranoid use cases + }; + + // File header files + enum basis_header_flags + { + // Always set for ETC1S files. Not set for UASTC files. + cBASISHeaderFlagETC1S = 1, + + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. + cBASISHeaderFlagYFlipped = 2, + + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. + // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. + cBASISHeaderFlagSRGB = 16, + }; + + // The image type field attempts to describe how to interpret the image data in a Basis file. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) + enum basis_texture_type + { + cBASISTexType2D = 0, // An arbitrary array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image may have a different resolution and # of mipmap levels + cBASISTexType2DArray = 1, // An array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image has the same resolution and mipmap levels + cBASISTexTypeCubemapArray = 2, // an array of cubemap levels, total # of images must be divisable by 6, in X+, X-, Y+, Y-, Z+, Z- order, with optional mipmaps + cBASISTexTypeVideoFrames = 3, // An array of 2D video frames, with optional mipmaps, # frames = # images, each image has the same resolution and # of mipmap levels + cBASISTexTypeVolume = 4, // A 3D texture with optional mipmaps, Z dimension = # images, each image has the same resolution and # of mipmap levels + + cBASISTexTypeTotal + }; + + enum + { + cBASISMaxUSPerFrame = 0xFFFFFF + }; + + enum class basis_tex_format + { + // Original LDR formats + cETC1S = 0, + cUASTC_LDR_4x4 = 1, + + // HDR formats + cUASTC_HDR_4x4 = 2, + cASTC_HDR_6x6 = 3, + cUASTC_HDR_6x6_INTERMEDIATE = 4, // TODO: rename to UASTC_HDR_6x6 + + // XUASTC (supercompressed) LDR variants (the standard ASTC block sizes) + cXUASTC_LDR_4x4 = 5, + cXUASTC_LDR_5x4 = 6, + cXUASTC_LDR_5x5 = 7, + cXUASTC_LDR_6x5 = 8, + + cXUASTC_LDR_6x6 = 9, + cXUASTC_LDR_8x5 = 10, + cXUASTC_LDR_8x6 = 11, + cXUASTC_LDR_10x5 = 12, + + cXUASTC_LDR_10x6 = 13, + cXUASTC_LDR_8x8 = 14, + cXUASTC_LDR_10x8 = 15, + cXUASTC_LDR_10x10 = 16, + + cXUASTC_LDR_12x10 = 17, + cXUASTC_LDR_12x12 = 18, + + // Standard (non-supercompressed) ASTC LDR variants (the standard ASTC block sizes) + cASTC_LDR_4x4 = 19, + cASTC_LDR_5x4 = 20, + cASTC_LDR_5x5 = 21, + cASTC_LDR_6x5 = 22, + + cASTC_LDR_6x6 = 23, + cASTC_LDR_8x5 = 24, + cASTC_LDR_8x6 = 25, + cASTC_LDR_10x5 = 26, + + cASTC_LDR_10x6 = 27, + cASTC_LDR_8x8 = 28, + cASTC_LDR_10x8 = 29, + cASTC_LDR_10x10 = 30, + + cASTC_LDR_12x10 = 31, + cASTC_LDR_12x12 = 32, + + cTotalFormats + }; + + // True if the basis_tex_format is XUASTC LDR 4x4-12x12. + inline bool basis_tex_format_is_xuastc_ldr(basis_tex_format tex_fmt) + { + return ((uint32_t)tex_fmt >= (uint32_t)basis_tex_format::cXUASTC_LDR_4x4) && ((uint32_t)tex_fmt <= (uint32_t)basis_tex_format::cXUASTC_LDR_12x12); + } + + // True if the basis_tex_format is ASTC LDR 4x4-12x12. + inline bool basis_tex_format_is_astc_ldr(basis_tex_format tex_fmt) + { + return ((uint32_t)tex_fmt >= (uint32_t)basis_tex_format::cASTC_LDR_4x4) && ((uint32_t)tex_fmt <= (uint32_t)basis_tex_format::cASTC_LDR_12x12); + } + + inline void get_basis_tex_format_block_size(basis_tex_format tex_fmt, uint32_t &width, uint32_t &height) + { + switch (tex_fmt) + { + case basis_tex_format::cETC1S: width = 4; height = 4; break; + case basis_tex_format::cUASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cUASTC_HDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cASTC_HDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: width = 6; height = 6; break; + case basis_tex_format::cXUASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cXUASTC_LDR_5x4: width = 5; height = 4; break; + case basis_tex_format::cXUASTC_LDR_5x5: width = 5; height = 5; break; + case basis_tex_format::cXUASTC_LDR_6x5: width = 6; height = 5; break; + case basis_tex_format::cXUASTC_LDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cXUASTC_LDR_8x5: width = 8; height = 5; break; + case basis_tex_format::cXUASTC_LDR_8x6: width = 8; height = 6; break; + case basis_tex_format::cXUASTC_LDR_10x5: width = 10; height = 5; break; + case basis_tex_format::cXUASTC_LDR_10x6: width = 10; height = 6; break; + case basis_tex_format::cXUASTC_LDR_8x8: width = 8; height = 8; break; + case basis_tex_format::cXUASTC_LDR_10x8: width = 10; height = 8; break; + case basis_tex_format::cXUASTC_LDR_10x10: width = 10; height = 10; break; + case basis_tex_format::cXUASTC_LDR_12x10: width = 12; height = 10; break; + case basis_tex_format::cXUASTC_LDR_12x12: width = 12; height = 12; break; + case basis_tex_format::cASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cASTC_LDR_5x4: width = 5; height = 4; break; + case basis_tex_format::cASTC_LDR_5x5: width = 5; height = 5; break; + case basis_tex_format::cASTC_LDR_6x5: width = 6; height = 5; break; + case basis_tex_format::cASTC_LDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cASTC_LDR_8x5: width = 8; height = 5; break; + case basis_tex_format::cASTC_LDR_8x6: width = 8; height = 6; break; + case basis_tex_format::cASTC_LDR_10x5: width = 10; height = 5; break; + case basis_tex_format::cASTC_LDR_10x6: width = 10; height = 6; break; + case basis_tex_format::cASTC_LDR_8x8: width = 8; height = 8; break; + case basis_tex_format::cASTC_LDR_10x8: width = 10; height = 8; break; + case basis_tex_format::cASTC_LDR_10x10: width = 10; height = 10; break; + case basis_tex_format::cASTC_LDR_12x10: width = 12; height = 10; break; + case basis_tex_format::cASTC_LDR_12x12: width = 12; height = 12; break; + default: + assert(0); + width = 0; + height = 0; + break; + } + } + + struct basis_file_header + { + enum + { + cBASISSigValue = ('B' << 8) | 's', + cBASISFirstVersion = 0x10 + }; + + basisu::packed_uint<2> m_sig; // 2 byte file signature + basisu::packed_uint<2> m_ver; // Baseline file version + basisu::packed_uint<2> m_header_size; // Header size in bytes, sizeof(basis_file_header) + basisu::packed_uint<2> m_header_crc16; // CRC16 of the remaining header data + + basisu::packed_uint<4> m_data_size; // The total size of all data after the header + basisu::packed_uint<2> m_data_crc16; // The CRC16 of all data after the header + + basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) + + basisu::packed_uint<3> m_total_images; // The total # of images + + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format + basisu::packed_uint<2> m_flags; // enum basist::header_flags + basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type + basisu::packed_uint<3> m_us_per_frame; // Framerate of video, in microseconds per frame + + basisu::packed_uint<4> m_reserved; // For future use + basisu::packed_uint<4> m_userdata0; // For client use + basisu::packed_uint<4> m_userdata1; // For client use + + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file + basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes + + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file + basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes + + basisu::packed_uint<4> m_tables_file_ofs; // The file offset of the compressed Huffman codelength tables, for decompressing slices + basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables + + basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header + + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use + basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use + }; +#pragma pack (pop) + +} // namespace basist diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_idct.h b/Source/ThirdParty/basis_universal/transcoder/basisu_idct.h new file mode 100644 index 000000000..33b77d001 --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_idct.h @@ -0,0 +1,1446 @@ +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 2, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_2( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s1 += 7.071067691e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s1 += -7.071067691e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 3, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_3( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 5.773502588e-01f * v; + s1 += 5.773502588e-01f * v; + s2 += 5.773502588e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s2 += -7.071068883e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s1 += -8.164966106e-01f * v; + s2 += 4.082486033e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 4, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_4( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 5.000000000e-01f * v; + s1 += 5.000000000e-01f * v; + s2 += 5.000000000e-01f * v; + s3 += 5.000000000e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 6.532814503e-01f * v; + s1 += 2.705980539e-01f * v; + s2 += -2.705981135e-01f * v; + s3 += -6.532815099e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.999999702e-01f * v; + s1 += -4.999999702e-01f * v; + s2 += -4.999999106e-01f * v; + s3 += 5.000001788e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 2.705980539e-01f * v; + s1 += -6.532814503e-01f * v; + s2 += 6.532815099e-01f * v; + s3 += -2.705983818e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 5, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_5( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 4.472135901e-01f * v; + s1 += 4.472135901e-01f * v; + s2 += 4.472135901e-01f * v; + s3 += 4.472135901e-01f * v; + s4 += 4.472135901e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 6.015009880e-01f * v; + s1 += 3.717480302e-01f * v; + s3 += -3.717481494e-01f * v; + s4 += -6.015009284e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 5.116672516e-01f * v; + s1 += -1.954395324e-01f * v; + s2 += -6.324555278e-01f * v; + s3 += -1.954392791e-01f * v; + s4 += 5.116672516e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.717480302e-01f * v; + s1 += -6.015009284e-01f * v; + s3 += 6.015008688e-01f * v; + s4 += -3.717483282e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 1.954394877e-01f * v; + s1 += -5.116672516e-01f * v; + s2 += 6.324555278e-01f * v; + s3 += -5.116675496e-01f * v; + s4 += 1.954394132e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 6, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_6( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082483053e-01f * v; + s1 += 4.082483053e-01f * v; + s2 += 4.082483053e-01f * v; + s3 += 4.082483053e-01f * v; + s4 += 4.082483053e-01f * v; + s5 += 4.082483053e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 5.576775074e-01f * v; + s1 += 4.082482755e-01f * v; + s2 += 1.494291872e-01f * v; + s3 += -1.494293064e-01f * v; + s4 += -4.082482755e-01f * v; + s5 += -5.576775670e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.999999702e-01f * v; + s2 += -5.000000596e-01f * v; + s3 += -4.999999106e-01f * v; + s5 += 5.000000596e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s1 += -4.082482755e-01f * v; + s2 += -4.082483053e-01f * v; + s3 += 4.082484245e-01f * v; + s4 += 4.082480669e-01f * v; + s5 += -4.082485437e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886750996e-01f * v; + s1 += -5.773502588e-01f * v; + s2 += 2.886753380e-01f * v; + s3 += 2.886748910e-01f * v; + s4 += -5.773502588e-01f * v; + s5 += 2.886753976e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 1.494291872e-01f * v; + s1 += -4.082483053e-01f * v; + s2 += 5.576775074e-01f * v; + s3 += -5.576776266e-01f * v; + s4 += 4.082483053e-01f * v; + s5 += -1.494295001e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 7, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_7( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.779644668e-01f * v; + s1 += 3.779644668e-01f * v; + s2 += 3.779644668e-01f * v; + s3 += 3.779644668e-01f * v; + s4 += 3.779644668e-01f * v; + s5 += 3.779644668e-01f * v; + s6 += 3.779644668e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 5.211208463e-01f * v; + s1 += 4.179065228e-01f * v; + s2 += 2.319205552e-01f * v; + s4 += -2.319206595e-01f * v; + s5 += -4.179066122e-01f * v; + s6 += -5.211208463e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.815880954e-01f * v; + s1 += 1.189424619e-01f * v; + s2 += -3.332694173e-01f * v; + s3 += -5.345224738e-01f * v; + s4 += -3.332692087e-01f * v; + s5 += 1.189427450e-01f * v; + s6 += 4.815880954e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.179065228e-01f * v; + s1 += -2.319206595e-01f * v; + s2 += -5.211208463e-01f * v; + s4 += 5.211208463e-01f * v; + s5 += 2.319205403e-01f * v; + s6 += -4.179067314e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.332692981e-01f * v; + s1 += -4.815880954e-01f * v; + s2 += -1.189422309e-01f * v; + s3 += 5.345224738e-01f * v; + s4 += -1.189426631e-01f * v; + s5 += -4.815878570e-01f * v; + s6 += 3.332692981e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 2.319205552e-01f * v; + s1 += -5.211208463e-01f * v; + s2 += 4.179064631e-01f * v; + s4 += -4.179064035e-01f * v; + s5 += 5.211209059e-01f * v; + s6 += -2.319207191e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 1.189424619e-01f * v; + s1 += -3.332692087e-01f * v; + s2 += 4.815881252e-01f * v; + s3 += -5.345224738e-01f * v; + s4 += 4.815881550e-01f * v; + s5 += -3.332694471e-01f * v; + s6 += 1.189431697e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 8, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_8( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s1 += 3.535533845e-01f * v; + s2 += 3.535533845e-01f * v; + s3 += 3.535533845e-01f * v; + s4 += 3.535533845e-01f * v; + s5 += 3.535533845e-01f * v; + s6 += 3.535533845e-01f * v; + s7 += 3.535533845e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.903926253e-01f * v; + s1 += 4.157347977e-01f * v; + s2 += 2.777850926e-01f * v; + s3 += 9.754511714e-02f * v; + s4 += -9.754516184e-02f * v; + s5 += -2.777851820e-01f * v; + s6 += -4.157348275e-01f * v; + s7 += -4.903926551e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.619397521e-01f * v; + s1 += 1.913417131e-01f * v; + s2 += -1.913417578e-01f * v; + s3 += -4.619398117e-01f * v; + s4 += -4.619397521e-01f * v; + s5 += -1.913415641e-01f * v; + s6 += 1.913418025e-01f * v; + s7 += 4.619397819e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.157347977e-01f * v; + s1 += -9.754516184e-02f * v; + s2 += -4.903926551e-01f * v; + s3 += -2.777850032e-01f * v; + s4 += 2.777852118e-01f * v; + s5 += 4.903926253e-01f * v; + s6 += 9.754503518e-02f * v; + s7 += -4.157348871e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s1 += -3.535533845e-01f * v; + s2 += -3.535533249e-01f * v; + s3 += 3.535535038e-01f * v; + s4 += 3.535533845e-01f * v; + s5 += -3.535536230e-01f * v; + s6 += -3.535532653e-01f * v; + s7 += 3.535534143e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 2.777850926e-01f * v; + s1 += -4.903926551e-01f * v; + s2 += 9.754520655e-02f * v; + s3 += 4.157346785e-01f * v; + s4 += -4.157348871e-01f * v; + s5 += -9.754510969e-02f * v; + s6 += 4.903926551e-01f * v; + s7 += -2.777854204e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 1.913417131e-01f * v; + s1 += -4.619397521e-01f * v; + s2 += 4.619397819e-01f * v; + s3 += -1.913419515e-01f * v; + s4 += -1.913414896e-01f * v; + s5 += 4.619396627e-01f * v; + s6 += -4.619398713e-01f * v; + s7 += 1.913419515e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 9.754511714e-02f * v; + s1 += -2.777850032e-01f * v; + s2 += 4.157346785e-01f * v; + s3 += -4.903925955e-01f * v; + s4 += 4.903927147e-01f * v; + s5 += -4.157347977e-01f * v; + s6 += 2.777855694e-01f * v; + s7 += -9.754577279e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 9, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_9( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.333333433e-01f * v; + s1 += 3.333333433e-01f * v; + s2 += 3.333333433e-01f * v; + s3 += 3.333333433e-01f * v; + s4 += 3.333333433e-01f * v; + s5 += 3.333333433e-01f * v; + s6 += 3.333333433e-01f * v; + s7 += 3.333333433e-01f * v; + s8 += 3.333333433e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.642428160e-01f * v; + s1 += 4.082482755e-01f * v; + s2 += 3.030129671e-01f * v; + s3 += 1.612297893e-01f * v; + s5 += -1.612298936e-01f * v; + s6 += -3.030129969e-01f * v; + s7 += -4.082482755e-01f * v; + s8 += -4.642428458e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.429753423e-01f * v; + s1 += 2.357022464e-01f * v; + s2 += -8.185859025e-02f * v; + s3 += -3.611168861e-01f * v; + s4 += -4.714045227e-01f * v; + s5 += -3.611167669e-01f * v; + s6 += -8.185851574e-02f * v; + s7 += 2.357022166e-01f * v; + s8 += 4.429753721e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s2 += -4.082482755e-01f * v; + s3 += -4.082482159e-01f * v; + s5 += 4.082483649e-01f * v; + s6 += 4.082482755e-01f * v; + s8 += -4.082485437e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.611168265e-01f * v; + s1 += -2.357022911e-01f * v; + s2 += -4.429753125e-01f * v; + s3 += 8.185874671e-02f * v; + s4 += 4.714045227e-01f * v; + s5 += 8.185835928e-02f * v; + s6 += -4.429753721e-01f * v; + s7 += -2.357023507e-01f * v; + s8 += 3.611169457e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.030129671e-01f * v; + s1 += -4.082482755e-01f * v; + s2 += -1.612298042e-01f * v; + s3 += 4.642428458e-01f * v; + s5 += -4.642428160e-01f * v; + s6 += 1.612296849e-01f * v; + s7 += 4.082482159e-01f * v; + s8 += -3.030129373e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.357022464e-01f * v; + s1 += -4.714045227e-01f * v; + s2 += 2.357022166e-01f * v; + s3 += 2.357020825e-01f * v; + s4 += -4.714045227e-01f * v; + s5 += 2.357024848e-01f * v; + s6 += 2.357022017e-01f * v; + s7 += -4.714045227e-01f * v; + s8 += 2.357031256e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 1.612297893e-01f * v; + s1 += -4.082482159e-01f * v; + s2 += 4.642428458e-01f * v; + s3 += -3.030130565e-01f * v; + s5 += 3.030129075e-01f * v; + s6 += -4.642427862e-01f * v; + s7 += 4.082485735e-01f * v; + s8 += -1.612301171e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 8.185850084e-02f * v; + s1 += -2.357022166e-01f * v; + s2 += 3.611166775e-01f * v; + s3 += -4.429752231e-01f * v; + s4 += 4.714045227e-01f * v; + s5 += -4.429754615e-01f * v; + s6 += 3.611168563e-01f * v; + s7 += -2.357021123e-01f * v; + s8 += 8.185899258e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 10, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_10( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.162277639e-01f * v; + s1 += 3.162277639e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += 3.162277639e-01f * v; + s4 += 3.162277639e-01f * v; + s5 += 3.162277639e-01f * v; + s6 += 3.162277639e-01f * v; + s7 += 3.162277639e-01f * v; + s8 += 3.162277639e-01f * v; + s9 += 3.162277639e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.417076707e-01f * v; + s1 += 3.984702229e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += 2.030306906e-01f * v; + s4 += 6.995963305e-02f * v; + s5 += -6.995966285e-02f * v; + s6 += -2.030307651e-01f * v; + s7 += -3.162277639e-01f * v; + s8 += -3.984702528e-01f * v; + s9 += -4.417076707e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.253254235e-01f * v; + s1 += 2.628655434e-01f * v; + s3 += -2.628656328e-01f * v; + s4 += -4.253253937e-01f * v; + s5 += -4.253253639e-01f * v; + s6 += -2.628654838e-01f * v; + s8 += 2.628656626e-01f * v; + s9 += 4.253254235e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.984702229e-01f * v; + s1 += 6.995963305e-02f * v; + s2 += -3.162277639e-01f * v; + s3 += -4.417076409e-01f * v; + s4 += -2.030306011e-01f * v; + s5 += 2.030307949e-01f * v; + s6 += 4.417076707e-01f * v; + s7 += 3.162277639e-01f * v; + s8 += -6.995979697e-02f * v; + s9 += -3.984701931e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.618034124e-01f * v; + s1 += -1.381966174e-01f * v; + s2 += -4.472135901e-01f * v; + s3 += -1.381964386e-01f * v; + s4 += 3.618033826e-01f * v; + s5 += 3.618032932e-01f * v; + s6 += -1.381967962e-01f * v; + s7 += -4.472135901e-01f * v; + s8 += -1.381963789e-01f * v; + s9 += 3.618034124e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.162277639e-01f * v; + s1 += -3.162277639e-01f * v; + s2 += -3.162277043e-01f * v; + s3 += 3.162278533e-01f * v; + s4 += 3.162277639e-01f * v; + s5 += -3.162276745e-01f * v; + s6 += -3.162276447e-01f * v; + s7 += 3.162280619e-01f * v; + s8 += 3.162278533e-01f * v; + s9 += -3.162281811e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.628655434e-01f * v; + s1 += -4.253253937e-01f * v; + s3 += 4.253253639e-01f * v; + s4 += -2.628657520e-01f * v; + s5 += -2.628654242e-01f * v; + s6 += 4.253254235e-01f * v; + s8 += -4.253252745e-01f * v; + s9 += 2.628654540e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.030306906e-01f * v; + s1 += -4.417076409e-01f * v; + s2 += 3.162278533e-01f * v; + s3 += 6.995949894e-02f * v; + s4 += -3.984701633e-01f * v; + s5 += 3.984702528e-01f * v; + s6 += -6.996008009e-02f * v; + s7 += -3.162274361e-01f * v; + s8 += 4.417077899e-01f * v; + s9 += -2.030310780e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 1.381965876e-01f * v; + s1 += -3.618033826e-01f * v; + s2 += 4.472135901e-01f * v; + s3 += -3.618035913e-01f * v; + s4 += 1.381965429e-01f * v; + s5 += 1.381962299e-01f * v; + s6 += -3.618031442e-01f * v; + s7 += 4.472135901e-01f * v; + s8 += -3.618036509e-01f * v; + s9 += 1.381966770e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 6.995963305e-02f * v; + s1 += -2.030306011e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += -3.984701633e-01f * v; + s4 += 4.417076409e-01f * v; + s5 += -4.417076409e-01f * v; + s6 += 3.984701931e-01f * v; + s7 += -3.162280619e-01f * v; + s8 += 2.030308247e-01f * v; + s9 += -6.995939463e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 11, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_11( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + float s10 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.015113473e-01f * v; + s1 += 3.015113473e-01f * v; + s2 += 3.015113473e-01f * v; + s3 += 3.015113473e-01f * v; + s4 += 3.015113473e-01f * v; + s5 += 3.015113473e-01f * v; + s6 += 3.015113473e-01f * v; + s7 += 3.015113473e-01f * v; + s8 += 3.015113473e-01f * v; + s9 += 3.015113473e-01f * v; + s10 += 3.015113473e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.220612943e-01f * v; + s1 += 3.878683746e-01f * v; + s2 += 3.222526908e-01f * v; + s3 += 2.305300087e-01f * v; + s4 += 1.201311573e-01f * v; + s6 += -1.201311946e-01f * v; + s7 += -2.305300087e-01f * v; + s8 += -3.222527206e-01f * v; + s9 += -3.878683746e-01f * v; + s10 += -4.220612943e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.091291726e-01f * v; + s1 += 2.792335451e-01f * v; + s2 += 6.068321317e-02f * v; + s3 += -1.771336049e-01f * v; + s4 += -3.587117195e-01f * v; + s5 += -4.264014363e-01f * v; + s6 += -3.587116897e-01f * v; + s7 += -1.771335900e-01f * v; + s8 += 6.068333238e-02f * v; + s9 += 2.792335451e-01f * v; + s10 += 4.091292024e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.878683746e-01f * v; + s1 += 1.201311573e-01f * v; + s2 += -2.305300087e-01f * v; + s3 += -4.220612943e-01f * v; + s4 += -3.222526908e-01f * v; + s6 += 3.222527504e-01f * v; + s7 += 4.220612645e-01f * v; + s8 += 2.305298299e-01f * v; + s9 += -1.201310679e-01f * v; + s10 += -3.878685534e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.587117195e-01f * v; + s1 += -6.068325043e-02f * v; + s2 += -4.091292024e-01f * v; + s3 += -2.792334855e-01f * v; + s4 += 1.771336049e-01f * v; + s5 += 4.264014363e-01f * v; + s6 += 1.771334559e-01f * v; + s7 += -2.792335153e-01f * v; + s8 += -4.091291428e-01f * v; + s9 += -6.068325043e-02f * v; + s10 += 3.587118387e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.222526908e-01f * v; + s1 += -2.305300087e-01f * v; + s2 += -3.878683448e-01f * v; + s3 += 1.201313213e-01f * v; + s4 += 4.220612645e-01f * v; + s6 += -4.220612943e-01f * v; + s7 += -1.201310530e-01f * v; + s8 += 3.878682852e-01f * v; + s9 += 2.305295914e-01f * v; + s10 += -3.222530484e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.792335451e-01f * v; + s1 += -3.587117195e-01f * v; + s2 += -1.771335900e-01f * v; + s3 += 4.091292024e-01f * v; + s4 += 6.068318710e-02f * v; + s5 += -4.264014363e-01f * v; + s6 += 6.068341061e-02f * v; + s7 += 4.091290832e-01f * v; + s8 += -1.771339774e-01f * v; + s9 += -3.587118387e-01f * v; + s10 += 2.792341411e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.305300087e-01f * v; + s1 += -4.220612943e-01f * v; + s2 += 1.201313213e-01f * v; + s3 += 3.222525418e-01f * v; + s4 += -3.878685534e-01f * v; + s6 += 3.878683150e-01f * v; + s7 += -3.222530484e-01f * v; + s8 += -1.201303899e-01f * v; + s9 += 4.220611751e-01f * v; + s10 += -2.305305302e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 1.771335304e-01f * v; + s1 += -4.091291726e-01f * v; + s2 += 3.587118089e-01f * v; + s3 += -6.068347394e-02f * v; + s4 += -2.792334855e-01f * v; + s5 += 4.264014363e-01f * v; + s6 += -2.792337239e-01f * v; + s7 += -6.068337709e-02f * v; + s8 += 3.587115407e-01f * v; + s9 += -4.091291726e-01f * v; + s10 += 1.771339774e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 1.201311573e-01f * v; + s1 += -3.222526908e-01f * v; + s2 += 4.220612645e-01f * v; + s3 += -3.878685534e-01f * v; + s4 += 2.305301726e-01f * v; + s6 += -2.305298299e-01f * v; + s7 += 3.878681958e-01f * v; + s8 += -4.220613837e-01f * v; + s9 += 3.222527504e-01f * v; + s10 += -1.201314703e-01f * v; + } + } + + { + float v = src[10 * src_stride]; + if (v != 0.0f) + { + s0 += 6.068321317e-02f * v; + s1 += -1.771335900e-01f * v; + s2 += 2.792334557e-01f * v; + s3 += -3.587115407e-01f * v; + s4 += 4.091290832e-01f * v; + s5 += -4.264014363e-01f * v; + s6 += 4.091292620e-01f * v; + s7 += -3.587118387e-01f * v; + s8 += 2.792330980e-01f * v; + s9 += -1.771344692e-01f * v; + s10 += 6.068423390e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; + dst[10 * dst_stride] = s10; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 12, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_12( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + float s10 = 0.0f; + float s11 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886751294e-01f * v; + s1 += 2.886751294e-01f * v; + s2 += 2.886751294e-01f * v; + s3 += 2.886751294e-01f * v; + s4 += 2.886751294e-01f * v; + s5 += 2.886751294e-01f * v; + s6 += 2.886751294e-01f * v; + s7 += 2.886751294e-01f * v; + s8 += 2.886751294e-01f * v; + s9 += 2.886751294e-01f * v; + s10 += 2.886751294e-01f * v; + s11 += 2.886751294e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.047556818e-01f * v; + s1 += 3.771722317e-01f * v; + s2 += 3.238851428e-01f * v; + s3 += 2.485257983e-01f * v; + s4 += 1.562298536e-01f * v; + s5 += 5.328707024e-02f * v; + s6 += -5.328710750e-02f * v; + s7 += -1.562298536e-01f * v; + s8 += -2.485258281e-01f * v; + s9 += -3.238851428e-01f * v; + s10 += -3.771722913e-01f * v; + s11 += -4.047556818e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 3.943375647e-01f * v; + s1 += 2.886751294e-01f * v; + s2 += 1.056623980e-01f * v; + s3 += -1.056624874e-01f * v; + s4 += -2.886751294e-01f * v; + s5 += -3.943375945e-01f * v; + s6 += -3.943375647e-01f * v; + s7 += -2.886751592e-01f * v; + s8 += -1.056624129e-01f * v; + s9 += 1.056624204e-01f * v; + s10 += 2.886752486e-01f * v; + s11 += 3.943375647e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.771722317e-01f * v; + s1 += 1.562298536e-01f * v; + s2 += -1.562298536e-01f * v; + s3 += -3.771722913e-01f * v; + s4 += -3.771722317e-01f * v; + s5 += -1.562297344e-01f * v; + s6 += 1.562299281e-01f * v; + s7 += 3.771722615e-01f * v; + s8 += 3.771722019e-01f * v; + s9 += 1.562297940e-01f * v; + s10 += -1.562300622e-01f * v; + s11 += -3.771722317e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s2 += -3.535534441e-01f * v; + s3 += -3.535533547e-01f * v; + s5 += 3.535534739e-01f * v; + s6 += 3.535533845e-01f * v; + s8 += -3.535534143e-01f * v; + s9 += -3.535534143e-01f * v; + s11 += 3.535532951e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.238851428e-01f * v; + s1 += -1.562298536e-01f * v; + s2 += -4.047556818e-01f * v; + s3 += -5.328698456e-02f * v; + s4 += 3.771722615e-01f * v; + s5 += 2.485257536e-01f * v; + s6 += -2.485258281e-01f * v; + s7 += -3.771722317e-01f * v; + s8 += 5.328687653e-02f * v; + s9 += 4.047557116e-01f * v; + s10 += 1.562295407e-01f * v; + s11 += -3.238854110e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886751294e-01f * v; + s1 += -2.886751294e-01f * v; + s2 += -2.886751592e-01f * v; + s3 += 2.886752486e-01f * v; + s4 += 2.886749804e-01f * v; + s5 += -2.886753380e-01f * v; + s6 += -2.886750400e-01f * v; + s7 += 2.886751592e-01f * v; + s8 += 2.886749506e-01f * v; + s9 += -2.886752486e-01f * v; + s10 += -2.886748612e-01f * v; + s11 += 2.886750698e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.485257983e-01f * v; + s1 += -3.771722913e-01f * v; + s2 += -5.328698456e-02f * v; + s3 += 4.047556818e-01f * v; + s4 += -1.562300622e-01f * v; + s5 += -3.238852322e-01f * v; + s6 += 3.238853514e-01f * v; + s7 += 1.562295407e-01f * v; + s8 += -4.047557414e-01f * v; + s9 += 5.328752100e-02f * v; + s10 += 3.771720827e-01f * v; + s11 += -2.485256344e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 2.041241378e-01f * v; + s1 += -4.082483053e-01f * v; + s2 += 2.041243017e-01f * v; + s3 += 2.041239887e-01f * v; + s4 += -4.082483053e-01f * v; + s5 += 2.041243464e-01f * v; + s6 += 2.041241080e-01f * v; + s7 += -4.082483053e-01f * v; + s8 += 2.041242570e-01f * v; + s9 += 2.041241974e-01f * v; + s10 += -4.082483053e-01f * v; + s11 += 2.041237950e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 1.562298536e-01f * v; + s1 += -3.771722317e-01f * v; + s2 += 3.771722615e-01f * v; + s3 += -1.562300622e-01f * v; + s4 += -1.562296748e-01f * v; + s5 += 3.771723211e-01f * v; + s6 += -3.771723509e-01f * v; + s7 += 1.562300622e-01f * v; + s8 += 1.562293023e-01f * v; + s9 += -3.771721721e-01f * v; + s10 += 3.771724999e-01f * v; + s11 += -1.562300622e-01f * v; + } + } + + { + float v = src[10 * src_stride]; + if (v != 0.0f) + { + s0 += 1.056623980e-01f * v; + s1 += -2.886751592e-01f * v; + s2 += 3.943375647e-01f * v; + s3 += -3.943376541e-01f * v; + s4 += 2.886751592e-01f * v; + s5 += -1.056626216e-01f * v; + s6 += -1.056624576e-01f * v; + s7 += 2.886750400e-01f * v; + s8 += -3.943376839e-01f * v; + s9 += 3.943377137e-01f * v; + s10 += -2.886756361e-01f * v; + s11 += 1.056632623e-01f * v; + } + } + + { + float v = src[11 * src_stride]; + if (v != 0.0f) + { + s0 += 5.328707024e-02f * v; + s1 += -1.562297344e-01f * v; + s2 += 2.485257536e-01f * v; + s3 += -3.238852322e-01f * v; + s4 += 3.771723211e-01f * v; + s5 += -4.047556818e-01f * v; + s6 += 4.047556818e-01f * v; + s7 += -3.771722913e-01f * v; + s8 += 3.238852024e-01f * v; + s9 += -2.485264540e-01f * v; + s10 += 1.562305540e-01f * v; + s11 += -5.328702182e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; + dst[10 * dst_stride] = s10; + dst[11 * dst_stride] = s11; +} diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder.h b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder.h new file mode 100644 index 000000000..5c4f463ee --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder.h @@ -0,0 +1,1344 @@ +// basisu_transcoder.h +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also see basis_tex_format in basisu_file_headers.h (TODO: Perhaps move key definitions into here.) +#pragma once + +// By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default. +// Set BASISD_SUPPORT_KTX2 to 0 to completely disable KTX2 support as well as Zstd/miniz usage which is only required for UASTC supercompression in KTX2 files. +// Also see BASISD_SUPPORT_KTX2_ZSTD in basisu_transcoder.cpp, which individually disables Zstd usage. +#ifndef BASISD_SUPPORT_KTX2 + #define BASISD_SUPPORT_KTX2 1 +#endif + +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #define BASISD_SUPPORT_KTX2_ZSTD 0 +#endif + +#include "basisu_transcoder_internal.h" +#include "basisu_transcoder_uastc.h" +#include "basisu_file_headers.h" + +namespace basist +{ + // High-level composite texture formats supported by the transcoder. + // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. + // Notes: + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // fully opaque (255) alpha channel. + // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. + // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. + // - These enums must be kept in sync with Javascript code that calls the transcoder. + enum class transcoder_texture_format + { + // Compressed formats + + // ETC1-2 + cTFETC1_RGB = 0, // Opaque only, returns RGB or alpha data if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFETC2_RGBA = 1, // Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files + + // BC1-5, BC7 (desktop, some mobile devices) + cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files + cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) + cTFBC7_RGBA = 6, // RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC + + // PVRTC1 4bpp (mobile, PowerVR devices) + cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. + cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. + + // ASTC (mobile, some Intel CPU's, hopefully all desktop GPU's one day) + cTFASTC_LDR_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. + // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. + + // ATC (mobile, Adreno devices, this is a niche format) + cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + + // FXT1 (desktop, Intel devices, this is a super obscure format) + cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). + // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now. + // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h. + + cTFPVRTC2_4_RGB = 18, // Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB). + cTFPVRTC2_4_RGBA = 19, // Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks. + + cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) + cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps + + cTFBC6H = 22, // HDR, RGB only, unsigned + cTFASTC_HDR_4x4_RGBA = 23, // HDR, RGBA (currently UASTC HDR 4x4 encoders are only RGB), unsigned + + // Uncompressed (raw pixel) formats + // Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR). + cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. + cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 + cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 + cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 + + // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR 4x4 or ASTC HDR 6x6). + cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components) + cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha) + cTFRGB_9E5 = 26, // 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent) + + cTFASTC_HDR_6x6_RGBA = 27, // HDR, RGBA (currently our ASTC HDR 6x6 encodes are only RGB), unsigned + + + // The remaining LDR ASTC block sizes, excluding 4x4 (which is above). There are 14 total valid ASTC LDR/HDR block sizes. + cTFASTC_LDR_5x4_RGBA = 28, + cTFASTC_LDR_5x5_RGBA = 29, + cTFASTC_LDR_6x5_RGBA = 30, + cTFASTC_LDR_6x6_RGBA = 31, + cTFASTC_LDR_8x5_RGBA = 32, + cTFASTC_LDR_8x6_RGBA = 33, + cTFASTC_LDR_10x5_RGBA = 34, + cTFASTC_LDR_10x6_RGBA = 35, + cTFASTC_LDR_8x8_RGBA = 36, + cTFASTC_LDR_10x8_RGBA = 37, + cTFASTC_LDR_10x10_RGBA = 38, + cTFASTC_LDR_12x10_RGBA = 39, + cTFASTC_LDR_12x12_RGBA = 40, + + cTFTotalTextureFormats = 41, + + // ----- The following are old/legacy enums for compatibility with code compiled against previous versions + cTFETC1 = cTFETC1_RGB, + cTFETC2 = cTFETC2_RGBA, + cTFBC1 = cTFBC1_RGB, + cTFBC3 = cTFBC3_RGBA, + cTFBC4 = cTFBC4_R, + cTFBC5 = cTFBC5_RG, + + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC LDR 4x4, which supports numerous modes. + cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. + cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA, + cTFBC7_M5 = cTFBC7_RGBA, + cTFBC7_ALT = 7, + + cTFASTC_4x4 = cTFASTC_LDR_4x4_RGBA, + + cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, + + cTFASTC_4x4_RGBA = cTFASTC_LDR_4x4_RGBA + }; + + // For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel. + // NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing. + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt); + + // Returns the transcoder texture format's name in ASCII + const char* basis_get_format_name(transcoder_texture_format fmt); + + // Returns basis texture format name in ASCII + const char* basis_get_tex_format_name(basis_tex_format fmt); + + // Returns block format name in ASCII + const char* basis_get_block_format_name(block_format fmt); + + // Returns true if the format supports an alpha channel. + bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt); + + // Returns true if the format is HDR. + bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt); + + // Returns true if the format is LDR. + inline bool basis_transcoder_format_is_ldr(transcoder_texture_format fmt) { return !basis_transcoder_format_is_hdr(fmt); } + + // Returns true if the format is an LDR or HDR ASTC format. + bool basis_is_transcoder_texture_format_astc(transcoder_texture_format fmt); + + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); + + // Returns the texture type's name in ASCII. + const char* basis_get_texture_type_name(basis_texture_type tex_type); + + // Returns true if the transcoder texture type is an uncompressed (raw pixel) format. + bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type); + + // Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats. + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); + + // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. + uint32_t basis_get_block_width(transcoder_texture_format fmt); + + // Returns the block height for the specified texture format, which is currently always 4. + uint32_t basis_get_block_height(transcoder_texture_format fmt); + + // ASTC/XUASTC LDR formats only: Given a basis_tex_format (mode or codec), return the corresponding ASTC basisu::texture_format with the proper block size from 4x4-12x12. + basisu::texture_format basis_get_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt); + + // For any given basis_tex_format (mode or codec), return the LDR/HDR ASTC transcoder texture format with the proper block size. + transcoder_texture_format basis_get_transcoder_texture_format_from_basis_tex_format(basis_tex_format fmt); + // basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format: same as basis_get_transcoder_texture_format_from_basis_tex_format (TODO: remove) + transcoder_texture_format basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt); + + // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR, or XUASTC LDR 4x4-12x12). + // For XUASTC the ASTC block size must match the transcoder_texture_format's ASTC block size. + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); + + // Returns the block width/height for the specified basis texture file format. + uint32_t basis_tex_format_get_block_width(basis_tex_format fmt); + uint32_t basis_tex_format_get_block_height(basis_tex_format fmt); + + bool basis_tex_format_is_hdr(basis_tex_format fmt); + inline bool basis_tex_format_is_ldr(basis_tex_format fmt) { return !basis_tex_format_is_hdr(fmt); } + + // Validates that the output buffer is large enough to hold the entire transcoded texture. + // For uncompressed texture formats, most input parameters are in pixels, not blocks. + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels); + + // Computes the size in bytes of a transcoded image or texture, taking into account the format's block width/height and any minimum size PVRTC1 requirements required by OpenGL. + // Note the returned value is not necessarily the # of bytes a transcoder could write to the output buffer due to these minimum PVRTC1 requirements. + // (These PVRTC1 requirements are not ours, but OpenGL's.) + uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height); + + class basisu_transcoder; + + // This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame). + // For threading you can use one state per thread. + struct basisu_transcoder_state + { + struct block_preds + { + uint16_t m_endpoint_index; + uint8_t m_pred_bits; + }; + + basisu::vector m_block_endpoint_preds[2]; + + enum { cMaxPrevFrameLevels = 16 }; + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + + void clear() + { + for (uint32_t i = 0; i < 2; i++) + { + m_block_endpoint_preds[i].clear(); + + for (uint32_t j = 0; j < cMaxPrevFrameLevels; j++) + m_prev_frame_indices[i][j].clear(); + } + } + }; + + // Low-level helper classes that do the actual transcoding. + enum basisu_decode_flags + { + // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. + cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + + // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. + // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). + cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + + // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). + // This flag is used internally when decoding to BC3. + cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + + // The output buffer contains alpha endpoint/selector indices. + // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. + cDecodeFlagsOutputHasAlphaIndices = 16, + + // Enable slower, but higher quality transcoding for some formats. + // For ASTC/XUASTC->BC7, this enables partially analytical encoding vs. fully analytical. + cDecodeFlagsHighQuality = 32, + + // Disable ETC1S->BC7 adaptive chroma filtering, for much faster transcoding to BC7. + cDecodeFlagsNoETC1SChromaFiltering = 64, + + // Disable deblock filtering for XUASTC LDR transcoding to non-ASTC formats. + // For ASTC 8x6 or smaller block sizes, deblocking is always disabled unless you force it on using cDecodeFlagsForceDeblockFiltering. + cDecodeFlagsNoDeblockFiltering = 128, + + // More aggressive deblock filtering (only used when it's enabled) + cDecodeFlagsStrongerDeblockFiltering = 256, + + // Always apply deblocking, even for smaller ASTC block sizes (4x4-8x6). + cDecodeFlagsForceDeblockFiltering = 512, + + // By default XUASTC LDR 4x4, 6x6 and 8x6 are directly transcoded to BC7 without always requiring a full ASTC block unpack and analytical BC7 encode. This is 1.4x up to 3x faster in WASM. + // This trade offs some quality. The largest transcoding speed gain is achieved when the source XUASTC data isn't dual plane and only uses 1 subset. Otherwise the actual perf. gain is variable. + // To disable this optimization for all XUASTC block sizes and always use the fallback encoder, specify cDecodeFlagXUASTCLDRDisableFastBC7Transcoding. + cDecodeFlagXUASTCLDRDisableFastBC7Transcoding = 1024 + }; + + // ETC1S + class basisu_lowlevel_etc1s_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_etc1s_transcoder(); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_pGlobal_codebook = pGlobal_codebook; } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_pGlobal_codebook; } + + bool decode_palettes( + uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, + uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size); + + bool decode_tables(const uint8_t* pTable_data, uint32_t table_data_size); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, + header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index, + slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState, + astc_transcode_alpha, + pAlpha_blocks, + output_rows_in_pixels, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t rgb_offset, uint32_t rgb_length, uint64_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags = 0, + bool basis_file_has_alpha_slices = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0); + + void clear() + { + m_local_endpoints.clear(); + m_local_selectors.clear(); + m_endpoint_pred_model.clear(); + m_delta_endpoint_model.clear(); + m_selector_model.clear(); + m_selector_history_buf_rle_model.clear(); + m_selector_history_buf_size = 0; + } + + // Low-level methods + typedef basisu::vector endpoint_vec; + const endpoint_vec& get_endpoints() const { return m_local_endpoints; } + + typedef basisu::vector selector_vec; + const selector_vec& get_selectors() const { return m_local_selectors; } + + private: + const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; + + endpoint_vec m_local_endpoints; + selector_vec m_local_selectors; + + huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; + + uint32_t m_selector_history_buf_size; + + basisu_transcoder_state m_def_state; + }; + + // UASTC LDR 4x4 + class basisu_lowlevel_uastc_ldr_4x4_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_ldr_4x4_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + +#if BASISD_SUPPORT_XUASTC + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + struct xuastc_decoded_image + { + uint32_t m_actual_block_width = 0, m_actual_block_height = 0, m_actual_width = 0, m_actual_height = 0; + bool m_actual_has_alpha = false, m_uses_srgb_astc_decode_mode = false; + + bool decode(const uint8_t* pImage_data, uint32_t image_data_size, + astc_ldr_t::xuastc_decomp_image_init_callback_ptr pInit_callback, void* pInit_callback_data, + astc_ldr_t::xuastc_decomp_image_block_callback_ptr pBlock_callback, void* pBlock_callback_data) + { + const bool decomp_flag = astc_ldr_t::xuastc_ldr_decompress_image(pImage_data, image_data_size, + m_actual_block_width, m_actual_block_height, + m_actual_width, m_actual_height, + m_actual_has_alpha, m_uses_srgb_astc_decode_mode, basisu::g_debug_printf, + pInit_callback, pInit_callback_data, + pBlock_callback, pBlock_callback_data); + + return decomp_flag; + } + + void clear() + { + m_actual_block_width = 0; + m_actual_block_height = 0; + m_actual_width = 0; + m_actual_height = 0; + m_actual_has_alpha = false; + m_uses_srgb_astc_decode_mode = false; + } + }; +#endif + + class basisu_lowlevel_xuastc_ldr_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_xuastc_ldr_transcoder(); + + bool transcode_slice(basis_tex_format src_format, bool use_astc_srgb_decode_profile, void* pDst_blocks, uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(basis_tex_format src_format, bool use_astc_srgb_decode_profile, void* pDst_blocks, uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(src_format, use_astc_srgb_decode_profile, pDst_blocks, src_num_blocks_x, src_num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + basis_tex_format src_format, bool use_astc_srgb_decode_profile, + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // UASTC HDR 4x4 + class basisu_lowlevel_uastc_hdr_4x4_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_hdr_4x4_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // ASTC HDR 6x6 + class basisu_lowlevel_astc_hdr_6x6_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_astc_hdr_6x6_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // UASTC HDR 6x6 intermediate + class basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + struct basisu_slice_info + { + // The image's ACTUAL dimensions in texels. + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The texture's dimensions in texels - always a multiple of the texture's underlying block size (4x4-12x12). + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_compressed_size; + + uint32_t m_slice_index; // the slice index in the .basis file + uint32_t m_image_index; // the source image index originally provided to the encoder + uint32_t m_level_index; // the mipmap level within this image + + uint32_t m_unpacked_slice_crc16; + + bool m_alpha_flag; // true if the slice has alpha data + bool m_iframe_flag; // true if the slice is an I-Frame + }; + + typedef basisu::vector basisu_slice_info_vec; + + struct basisu_image_info + { + uint32_t m_image_index; + uint32_t m_total_levels; + + // The image's ACTUAL dimensions in texels. + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The texture's dimensions in texels - always a multiple of the texture's underlying block size (4x4-12x12). + uint32_t m_width; + uint32_t m_height; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_first_slice_index; + + bool m_alpha_flag; // true if the image has alpha data + bool m_iframe_flag; // true if the image is an I-Frame + }; + + struct basisu_image_level_info + { + uint32_t m_image_index; + uint32_t m_level_index; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_first_slice_index; + + uint32_t m_rgb_file_ofs; + uint32_t m_rgb_file_len; + uint32_t m_alpha_file_ofs; + uint32_t m_alpha_file_len; + + bool m_alpha_flag; // true if the image has alpha data + bool m_iframe_flag; // true if the image is an I-Frame + }; + + struct basisu_file_info + { + uint32_t m_version; + uint32_t m_total_header_size; + + uint32_t m_total_selectors; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_selector_codebook_ofs; + uint32_t m_selector_codebook_size; + + uint32_t m_total_endpoints; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_endpoint_codebook_ofs; + uint32_t m_endpoint_codebook_size; + + uint32_t m_tables_ofs; + uint32_t m_tables_size; + + uint32_t m_slices_size; + + basis_texture_type m_tex_type; + uint32_t m_us_per_frame; + + // Low-level slice information (1 slice per image for color-only basis files, 2 for alpha basis files) + basisu_slice_info_vec m_slice_info; + + uint32_t m_total_images; // total # of images + basisu::vector m_image_mipmap_levels; // the # of mipmap levels for each image + + uint32_t m_userdata0; + uint32_t m_userdata1; + + basis_tex_format m_tex_format; // ETC1S, UASTC, etc. + + uint32_t m_block_width; + uint32_t m_block_height; + + bool m_y_flipped; // true if the image was Y flipped + bool m_srgb; // true if the image is sRGB, false if linear + bool m_etc1s; // true if the file is ETC1S + bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) + }; + + // High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats. + // If you're just starting out this is the class you care about (or see the KTX2 transcoder below). + class basisu_transcoder + { + basisu_transcoder(basisu_transcoder&); + basisu_transcoder& operator= (const basisu_transcoder&); + + public: + basisu_transcoder(); + + // Validates the .basis file. This computes a crc16 over the entire file, so it's slow. + bool validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const; + + // Quick header validation - no crc16 checks. + bool validate_header(const void* pData, uint32_t data_size) const; + + basis_texture_type get_texture_type(const void* pData, uint32_t data_size) const; + bool get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const; + + // Returns the total number of images in the basis file (always 1 or more). + // Note that the number of mipmap levels for each image may differ, and that images may have different resolutions. + uint32_t get_total_images(const void* pData, uint32_t data_size) const; + + basis_tex_format get_basis_tex_format(const void* pData, uint32_t data_size) const; + + // Returns the number of mipmap levels in an image. + uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const; + + // Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4. + bool get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const; + + // Returns information about the specified image. + bool get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const; + + // Returns information about the specified image's mipmap level. + bool get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& level_info, uint32_t image_index, uint32_t level_index) const; + + // Get a description of the basis file and low-level information about each slice. + bool get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const; + + // start_transcoding() must be called before calling transcode_slice() or transcode_image_level(). + // For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). + bool start_transcoding(const void* pData, uint32_t data_size); + + bool stop_transcoding(); + + // Returns true if start_transcoding() has been called. + bool get_ready_to_transcode() const { return m_ready_to_transcode; } + + // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. + // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). + // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32 etc. + // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). + // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // Notes: + // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. + // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in + // a first pass, which will be read in a second pass. + bool transcode_image_level( + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0) const; + + // Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found. + int find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; + + // transcode_slice() decodes a single slice from the .basis file. It's a low-level API - most likely you want to use transcode_image_level(). + // This is a low-level API, and will be needed to be called multiple times to decode some texture formats (like BC3, BC5, or ETC2). + // output_blocks_buf_size_in_blocks_or_pixels is just used for verification to make sure the output buffer is large enough. + // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. + // output_block_stride_in_bytes: Number of bytes between each output block. + // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). + // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // Notes: + // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. + bool transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, void* pAlpha_blocks = nullptr, + uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const; + + static void write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_lowlevel_etc1s_decoder.set_global_codebooks(pGlobal_codebook); } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_lowlevel_etc1s_decoder.get_global_codebooks(); } + + const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; } + basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; } + + const basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_ldr_4x4_decoder; } + basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_ldr_4x4_decoder; } + + private: + mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; + mutable basisu_lowlevel_uastc_ldr_4x4_transcoder m_lowlevel_uastc_ldr_4x4_decoder; + mutable basisu_lowlevel_xuastc_ldr_transcoder m_lowlevel_xuastc_ldr_decoder; + mutable basisu_lowlevel_uastc_hdr_4x4_transcoder m_lowlevel_uastc_4x4_hdr_decoder; + mutable basisu_lowlevel_astc_hdr_6x6_transcoder m_lowlevel_astc_6x6_hdr_decoder; + mutable basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder m_lowlevel_astc_6x6_hdr_intermediate_decoder; + + bool m_ready_to_transcode; + + int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const; + + bool validate_header_quick(const void* pData, uint32_t data_size) const; + }; + + // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. + void basisu_transcoder_init(); + + enum debug_flags_t + { + cDebugFlagVisCRs = 1, + cDebugFlagVisBC1Sels = 2, + cDebugFlagVisBC1Endpoints = 4 + }; + uint32_t get_debug_flags(); + void set_debug_flags(uint32_t f); + + // ------------------------------------------------------------------------------------------------------ + // Optional .KTX2 file format support + // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. + // ------------------------------------------------------------------------------------------------------ +#if BASISD_SUPPORT_KTX2 +#pragma pack(push) +#pragma pack(1) + struct ktx2_header + { + uint8_t m_identifier[12]; + basisu::packed_uint<4> m_vk_format; + basisu::packed_uint<4> m_type_size; + basisu::packed_uint<4> m_pixel_width; + basisu::packed_uint<4> m_pixel_height; + basisu::packed_uint<4> m_pixel_depth; + basisu::packed_uint<4> m_layer_count; + basisu::packed_uint<4> m_face_count; + basisu::packed_uint<4> m_level_count; + basisu::packed_uint<4> m_supercompression_scheme; + basisu::packed_uint<4> m_dfd_byte_offset; + basisu::packed_uint<4> m_dfd_byte_length; + basisu::packed_uint<4> m_kvd_byte_offset; + basisu::packed_uint<4> m_kvd_byte_length; + basisu::packed_uint<8> m_sgd_byte_offset; + basisu::packed_uint<8> m_sgd_byte_length; + }; + + struct ktx2_level_index + { + basisu::packed_uint<8> m_byte_offset; + basisu::packed_uint<8> m_byte_length; + basisu::packed_uint<8> m_uncompressed_byte_length; + }; + + struct ktx2_etc1s_global_data_header + { + basisu::packed_uint<2> m_endpoint_count; + basisu::packed_uint<2> m_selector_count; + basisu::packed_uint<4> m_endpoints_byte_length; + basisu::packed_uint<4> m_selectors_byte_length; + basisu::packed_uint<4> m_tables_byte_length; + basisu::packed_uint<4> m_extended_byte_length; + }; + + struct ktx2_etc1s_image_desc + { + basisu::packed_uint<4> m_image_flags; + basisu::packed_uint<4> m_rgb_slice_byte_offset; + basisu::packed_uint<4> m_rgb_slice_byte_length; + basisu::packed_uint<4> m_alpha_slice_byte_offset; + basisu::packed_uint<4> m_alpha_slice_byte_length; + }; + + struct ktx2_slice_offset_len_desc + { + basisu::packed_uint<4> m_slice_byte_offset; + basisu::packed_uint<4> m_slice_byte_length; + }; + + struct ktx2_animdata + { + basisu::packed_uint<4> m_duration; + basisu::packed_uint<4> m_timescale; + basisu::packed_uint<4> m_loopcount; + }; +#pragma pack(pop) + + const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; + + // These are standard Vulkan texture VkFormat ID's, see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html + const uint32_t KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK = 1000066000; + const uint32_t KTX2_FORMAT_ASTC_5x4_SFLOAT_BLOCK = 1000066001; + const uint32_t KTX2_FORMAT_ASTC_5x5_SFLOAT_BLOCK = 1000066002; + const uint32_t KTX2_FORMAT_ASTC_6x5_SFLOAT_BLOCK = 1000066003; + const uint32_t KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK = 1000066004; + const uint32_t KTX2_FORMAT_ASTC_8x5_SFLOAT_BLOCK = 1000066005; + const uint32_t KTX2_FORMAT_ASTC_8x6_SFLOAT_BLOCK = 1000066006; + + const uint32_t KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, KTX2_FORMAT_ASTC_4x4_SRGB_BLOCK = 158; + const uint32_t KTX2_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, KTX2_FORMAT_ASTC_5x4_SRGB_BLOCK = 160; + const uint32_t KTX2_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, KTX2_FORMAT_ASTC_5x5_SRGB_BLOCK = 162; + const uint32_t KTX2_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, KTX2_FORMAT_ASTC_6x5_SRGB_BLOCK = 164; + const uint32_t KTX2_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, KTX2_FORMAT_ASTC_6x6_SRGB_BLOCK = 166; + const uint32_t KTX2_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, KTX2_FORMAT_ASTC_8x5_SRGB_BLOCK = 168; + const uint32_t KTX2_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, KTX2_FORMAT_ASTC_8x6_SRGB_BLOCK = 170; + const uint32_t KTX2_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, KTX2_FORMAT_ASTC_10x5_SRGB_BLOCK = 174; + const uint32_t KTX2_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, KTX2_FORMAT_ASTC_10x6_SRGB_BLOCK = 176; + const uint32_t KTX2_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, KTX2_FORMAT_ASTC_8x8_SRGB_BLOCK = 172; // note the ASTC block size order is off in the vkFormat definitions + const uint32_t KTX2_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, KTX2_FORMAT_ASTC_10x8_SRGB_BLOCK = 178; + const uint32_t KTX2_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, KTX2_FORMAT_ASTC_10x10_SRGB_BLOCK = 180; + const uint32_t KTX2_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, KTX2_FORMAT_ASTC_12x10_SRGB_BLOCK = 182; + const uint32_t KTX2_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, KTX2_FORMAT_ASTC_12x12_SRGB_BLOCK = 184; + + const uint32_t KTX2_KDF_DF_MODEL_ASTC = 162; // 0xA2 + const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; // 0xA3 + const uint32_t KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 = 166; // 0xA6 + const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 = 167; // 0xA7 + const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE = 168; // 0xA8, TODO - coordinate with Khronos on this + const uint32_t KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE = 169; // 0xA9, TODO - coordinate with Khronos on this + + const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; + const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR + const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased + + // The KTX2 transfer functions supported by KTX2 + const uint32_t KTX2_KHR_DF_TRANSFER_LINEAR = 1; + const uint32_t KTX2_KHR_DF_TRANSFER_SRGB = 2; + + enum ktx2_supercompression + { + KTX2_SS_NONE = 0, + KTX2_SS_BASISLZ = 1, + KTX2_SS_ZSTANDARD = 2, + KTX2_SS_BASIS + }; + + extern const uint8_t g_ktx2_file_identifier[12]; + + enum ktx2_df_channel_id + { + KTX2_DF_CHANNEL_ETC1S_RGB = 0U, + KTX2_DF_CHANNEL_ETC1S_RRR = 3U, + KTX2_DF_CHANNEL_ETC1S_GGG = 4U, + KTX2_DF_CHANNEL_ETC1S_AAA = 15U, + + KTX2_DF_CHANNEL_UASTC_DATA = 0U, + KTX2_DF_CHANNEL_UASTC_RGB = 0U, + KTX2_DF_CHANNEL_UASTC_RGBA = 3U, + KTX2_DF_CHANNEL_UASTC_RRR = 4U, + KTX2_DF_CHANNEL_UASTC_RRRG = 5U, + KTX2_DF_CHANNEL_UASTC_RG = 6U, + }; + + inline const char* ktx2_get_etc1s_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_ETC1S_RGB: return "RGB"; + case KTX2_DF_CHANNEL_ETC1S_RRR: return "RRR"; + case KTX2_DF_CHANNEL_ETC1S_GGG: return "GGG"; + case KTX2_DF_CHANNEL_ETC1S_AAA: return "AAA"; + default: break; + } + return "?"; + } + + inline const char* ktx2_get_uastc_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_UASTC_RGB: return "RGB"; + case KTX2_DF_CHANNEL_UASTC_RGBA: return "RGBA"; + case KTX2_DF_CHANNEL_UASTC_RRR: return "RRR"; + case KTX2_DF_CHANNEL_UASTC_RRRG: return "RRRG"; + case KTX2_DF_CHANNEL_UASTC_RG: return "RG"; + default: break; + } + return "?"; + } + + enum ktx2_df_color_primaries + { + KTX2_DF_PRIMARIES_UNSPECIFIED = 0, + KTX2_DF_PRIMARIES_BT709 = 1, + KTX2_DF_PRIMARIES_SRGB = 1, + KTX2_DF_PRIMARIES_BT601_EBU = 2, + KTX2_DF_PRIMARIES_BT601_SMPTE = 3, + KTX2_DF_PRIMARIES_BT2020 = 4, + KTX2_DF_PRIMARIES_CIEXYZ = 5, + KTX2_DF_PRIMARIES_ACES = 6, + KTX2_DF_PRIMARIES_ACESCC = 7, + KTX2_DF_PRIMARIES_NTSC1953 = 8, + KTX2_DF_PRIMARIES_PAL525 = 9, + KTX2_DF_PRIMARIES_DISPLAYP3 = 10, + KTX2_DF_PRIMARIES_ADOBERGB = 11 + }; + + inline const char* ktx2_get_df_color_primaries_str(ktx2_df_color_primaries p) + { + switch (p) + { + case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; + case KTX2_DF_PRIMARIES_BT709: return "BT709"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; + case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; + case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; + case KTX2_DF_PRIMARIES_ACES: return "ACES"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; + case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; + case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; + case KTX2_DF_PRIMARIES_ADOBERGB: return "ADOBERGB"; + default: break; + } + return "?"; + } + + // Information about a single 2D texture "image" in a KTX2 file. + struct ktx2_image_level_info + { + // The mipmap level index (0=largest), texture array layer index, and cubemap face index of the image. + uint32_t m_level_index; + uint32_t m_layer_index; + uint32_t m_face_index; + + // The image's ACTUAL (or the original source image's) width/height in pixels, which may not be divisible by the block size (4-12 pixels). + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The image's physical width/height, which will always be divisible by the format's block size (4-12 pixels). + uint32_t m_width; + uint32_t m_height; + + // The texture's dimensions in 4x4-12x12 texel blocks. + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + // The format's block width/height (4-12). + uint32_t m_block_width; + uint32_t m_block_height; + + // The total number of blocks + uint32_t m_total_blocks; + + // true if the image has alpha data + bool m_alpha_flag; + + // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. + bool m_iframe_flag; + }; + + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) + struct ktx2_transcoder_state + { + basist::basisu_transcoder_state m_transcoder_state; + basisu::uint8_vec m_level_uncomp_data; + int m_uncomp_data_level_index; + + void clear() + { + m_transcoder_state.clear(); + m_level_uncomp_data.clear(); + m_uncomp_data_level_index = -1; + } + }; + + // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. + // It does not support 1D or 3D textures. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). + class ktx2_transcoder + { + public: + ktx2_transcoder(); + + // Frees all allocations, resets object. + void clear(); + + // init() parses the KTX2 header, level index array, DFD, and key values, but nothing else. + // Importantly, it does not parse or decompress the ETC1S global supercompressed data, so some things (like which frames are I/P-Frames) won't be available until start_transcoding() is called. + // This method holds a pointer to the file data until clear() is called. + bool init(const void* pData, uint32_t data_size); + + // Returns the data/size passed to init(). + const uint8_t* get_data() const { return m_pData; } + uint32_t get_data_size() const { return m_data_size; } + + // Returns the KTX2 header. Valid after init(). + const ktx2_header& get_header() const { return m_header; } + + // Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init(). + const basisu::vector& get_level_index() const { return m_levels; } + + // Returns the texture's width in texels. Always non-zero, might not be divisible by the block size. Valid after init(). + uint32_t get_width() const { return m_header.m_pixel_width; } + + // Returns the texture's height in texels. Always non-zero, might not be divisible by the block size. Valid after init(). + uint32_t get_height() const { return m_header.m_pixel_height; } + + // Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init(). + uint32_t get_levels() const { return m_header.m_level_count; } + + // Returns the number of faces. Returns 1 for 2D textures and or 6 for cubemaps. Valid after init(). + uint32_t get_faces() const { return m_header.m_face_count; } + + // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). + uint32_t get_layers() const { return m_header.m_layer_count; } + + // Returns cETC1S, cUASTC4x4, cUASTC_HDR_4x4, cASTC_HDR_6x6, cUASTC_HDR_6x6_INTERMEDIATE, etc. Valid after init(). + basist::basis_tex_format get_basis_tex_format() const { return m_format; } + + // ETC1S LDR 4x4 + bool is_etc1s() const { return get_basis_tex_format() == basist::basis_tex_format::cETC1S; } + + // UASTC LDR 4x4 (only) + bool is_uastc() const { return get_basis_tex_format() == basist::basis_tex_format::cUASTC_LDR_4x4; } + + // Is ASTC HDR 4x4 or 6x6 + bool is_hdr() const + { + return basis_tex_format_is_hdr(get_basis_tex_format()); + } + + bool is_ldr() const + { + return !is_hdr(); + } + + // is UASTC HDR 4x4 (which is also standard ASTC HDR 4x4 data) + bool is_hdr_4x4() const + { + return (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_4x4); + } + + // is ASTC HDR 6x6 or UASTC HDR 6x6 intermediate (only) + bool is_hdr_6x6() const + { + return (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6) || (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); + } + + // is ASTC LDR 4x4-12x12 (only) + bool is_astc_ldr() const { return basis_tex_format_is_astc_ldr(get_basis_tex_format()); } + + // is XUASTC LDR 4x4-12x12 (only) + bool is_xuastc_ldr() const { return basis_tex_format_is_xuastc_ldr(get_basis_tex_format()); } + + uint32_t get_block_width() const { return basis_tex_format_get_block_width(get_basis_tex_format()); } + uint32_t get_block_height() const { return basis_tex_format_get_block_height(get_basis_tex_format()); } + + // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). + uint32_t get_has_alpha() const { return m_has_alpha; } + + // Returns the entire Data Format Descriptor (DFD) from the KTX2 file. Valid after init(). + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#_the_khronos_data_format_descriptor_overview + const basisu::uint8_vec& get_dfd() const { return m_dfd; } + + // Some basic DFD accessors. Valid after init(). + uint32_t get_dfd_color_model() const { return m_dfd_color_model; } + + // Returns the DFD color primary. + // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. + ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } + + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. + uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } + + bool is_srgb() const { return (get_dfd_transfer_func() == KTX2_KHR_DF_TRANSFER_SRGB); } + + uint32_t get_dfd_flags() const { return m_dfd_flags; } + + // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). + uint32_t get_dfd_total_samples() const { return m_dfd_samples; } + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + // It's up to the caller to decide what to do if the value isn't in the enum. + ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } + ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } + + // Key value field data. + struct key_value + { + // The key field is UTF8 and always zero terminated. + // In memory we always append a zero terminator to the key. + basisu::uint8_vec m_key; + + // The value may be empty. In the KTX2 file it consists of raw bytes which may or may not be zero terminated. + // In memory we always append a zero terminator to the value. + basisu::uint8_vec m_value; + + bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; } + }; + typedef basisu::vector key_value_vec; + + // Returns the array of key-value entries. This may be empty. Valid after init(). + // The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points. + const key_value_vec& get_key_values() const { return m_key_values; } + + const basisu::uint8_vec *find_key(const std::string& key_name) const; + + // Low-level ETC1S specific accessors + + // Returns the ETC1S global supercompression data header, which is only valid after start_transcoding() is called. + const ktx2_etc1s_global_data_header& get_etc1s_header() const { return m_etc1s_header; } + + // Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called. + const basisu::vector& get_etc1s_image_descs() const { return m_etc1s_image_descs; } + + const basisu::vector& get_slice_offset_len_descs() const { return m_slice_offset_len_descs; } + + // Must have called startTranscoding() first + uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // is_video() is only valid after start_transcoding() is called. + // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. + bool is_video() const { return m_is_video; } + + // Defaults to 0, only non-zero if the key existed in the source KTX2 file. + float get_ldr_hdr_upconversion_nit_multiplier() const { return m_ldr_hdr_upconversion_nit_multiplier; } + + // start_transcoding() MUST be called before calling transcode_image_level(). + // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. + bool start_transcoding(); + + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. + // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. + bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. + // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. + // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. + // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. + bool transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, + ktx2_transcoder_state *pState = nullptr); + + private: + const uint8_t* m_pData; + uint32_t m_data_size; + + ktx2_header m_header; + basisu::vector m_levels; + basisu::uint8_vec m_dfd; + key_value_vec m_key_values; + + ktx2_etc1s_global_data_header m_etc1s_header; + basisu::vector m_etc1s_image_descs; + basisu::vector m_slice_offset_len_descs; + + basist::basis_tex_format m_format; + + uint32_t m_dfd_color_model; + ktx2_df_color_primaries m_dfd_color_prims; + + // KTX2_KHR_DF_TRANSFER_LINEAR vs. KTX2_KHR_DF_TRANSFER_SRGB (for XUASTC LDR: which profile was used during encoding) + uint32_t m_dfd_transfer_func; + + uint32_t m_dfd_flags; + uint32_t m_dfd_samples; + ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; + + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; + basist::basisu_lowlevel_uastc_ldr_4x4_transcoder m_uastc_ldr_transcoder; + basist::basisu_lowlevel_xuastc_ldr_transcoder m_xuastc_ldr_transcoder; + basist::basisu_lowlevel_uastc_hdr_4x4_transcoder m_uastc_hdr_transcoder; + basist::basisu_lowlevel_astc_hdr_6x6_transcoder m_astc_hdr_6x6_transcoder; + basist::basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder m_astc_hdr_6x6_intermediate_transcoder; + + ktx2_transcoder_state m_def_transcoder_state; + + bool m_has_alpha; + bool m_is_video; + float m_ldr_hdr_upconversion_nit_multiplier; + + bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data); + bool read_slice_offset_len_global_data(); + bool decompress_etc1s_global_data(); + bool read_key_values(); + }; + + // Replaces if the key already exists + inline void ktx2_add_key_value(ktx2_transcoder::key_value_vec& key_values, const std::string& key, const std::string& val) + { + assert(key.size()); + + basist::ktx2_transcoder::key_value* p = nullptr; + + // Try to find an existing key + for (size_t i = 0; i < key_values.size(); i++) + { + if (strcmp((const char*)key_values[i].m_key.data(), key.c_str()) == 0) + { + p = &key_values[i]; + break; + } + } + + if (!p) + p = key_values.enlarge(1); + + p->m_key.resize(0); + p->m_value.resize(0); + + p->m_key.resize(key.size() + 1); + memcpy(p->m_key.data(), key.c_str(), key.size()); + + p->m_value.resize(val.size() + 1); + if (val.size()) + memcpy(p->m_value.data(), val.c_str(), val.size()); + } + +#endif // BASISD_SUPPORT_KTX2 + + // Returns true if the transcoder was compiled with KTX2 support. + bool basisu_transcoder_supports_ktx2(); + + // Returns true if the transcoder was compiled with Zstandard support. + bool basisu_transcoder_supports_ktx2_zstd(); + +} // namespace basisu + diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_internal.h b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_internal.h new file mode 100644 index 000000000..c830795aa --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_internal.h @@ -0,0 +1,3263 @@ +// basisu_transcoder_internal.h - Universal texture format transcoder library. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +// v1.50: Added UASTC HDR 4x4 support +// v1.60: Added RDO ASTC HDR 6x6 and intermediate support +// v1.65: Added ASTC LDR 4x4-12x12 and XUASTC LDR 4x4-12x12 +// v2.00: Added unified effort/quality options across all formats, fast direct transcoding of XUASTC 4x4/6x6/8x6 to BC7, adaptive deblocking, ZStd or arithmetic profiles, weight grid DCT +#define BASISD_LIB_VERSION 200 +#define BASISD_VERSION_STRING "02.00" + +#ifdef _DEBUG +#define BASISD_BUILD_DEBUG +#else +#define BASISD_BUILD_RELEASE +#endif + +#include "basisu.h" +#include "basisu_astc_helpers.h" + +#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) + +namespace basisu +{ + extern bool g_debug_printf; +} + +namespace basist +{ + // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats). + // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. + enum class block_format + { + cETC1, // ETC1S RGB + cETC2_RGBA, // full ETC2 EAC RGBA8 block + cBC1, // DXT1 RGB + cBC3, // BC4 block followed by a four color BC1 block + cBC4, // DXT5A (alpha block only) + cBC5, // two BC4 blocks + cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp + cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA + cBC7, // Full BC7 block, any mode + cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) + cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) + cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) + cASTC_LDR_4x4, // ASTC LDR 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB decode mode is not enabled when outputting ASTC LDR for ETC1S/UASTC LDR 4x4. + // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. + + cATC_RGB, + cATC_RGBA_INTERPOLATED_ALPHA, + cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size + + cPVRTC2_4_RGB, + cPVRTC2_4_RGBA, + + cETC2_EAC_R11, + cETC2_EAC_RG11, + + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) + + cRGB32, // Writes RGB components to 32bpp output pixels + cRGBA32, // Writes RGB255 components to 32bpp output pixels + cA32, // Writes alpha component to 32bpp output pixels + + cRGB565, + cBGR565, + + cRGBA4444_COLOR, + cRGBA4444_ALPHA, + cRGBA4444_COLOR_OPAQUE, + cRGBA4444, + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5, + + cUASTC_4x4, // LDR, universal + cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed + cBC6H, + + cASTC_HDR_4x4, + cASTC_HDR_6x6, + + // The remaining ASTC LDR block sizes. + cASTC_LDR_5x4, + cASTC_LDR_5x5, + cASTC_LDR_6x5, + cASTC_LDR_6x6, + cASTC_LDR_8x5, + cASTC_LDR_8x6, + cASTC_LDR_10x5, + cASTC_LDR_10x6, + cASTC_LDR_8x8, + cASTC_LDR_10x8, + cASTC_LDR_10x10, + cASTC_LDR_12x10, + cASTC_LDR_12x12, + + cTotalBlockFormats + }; + + inline bool block_format_is_hdr(block_format fmt) + { + switch (fmt) + { + case block_format::cUASTC_HDR_4x4: + case block_format::cBC6H: + case block_format::cASTC_HDR_4x4: + case block_format::cASTC_HDR_6x6: + return true; + default: + break; + } + + return false; + } + + // LDR or HDR ASTC? + inline bool block_format_is_astc(block_format fmt) + { + switch (fmt) + { + case block_format::cASTC_LDR_4x4: + case block_format::cASTC_LDR_5x4: + case block_format::cASTC_LDR_5x5: + case block_format::cASTC_LDR_6x5: + case block_format::cASTC_LDR_6x6: + case block_format::cASTC_LDR_8x5: + case block_format::cASTC_LDR_8x6: + case block_format::cASTC_LDR_10x5: + case block_format::cASTC_LDR_10x6: + case block_format::cASTC_LDR_8x8: + case block_format::cASTC_LDR_10x8: + case block_format::cASTC_LDR_10x10: + case block_format::cASTC_LDR_12x10: + case block_format::cASTC_LDR_12x12: + case block_format::cASTC_HDR_4x4: + case block_format::cASTC_HDR_6x6: + return true; + default: + break; + } + + return false; + } + + inline uint32_t get_block_width(block_format fmt) + { + switch (fmt) + { + case block_format::cFXT1_RGB: + return 8; + case block_format::cASTC_HDR_6x6: + return 6; + + case block_format::cASTC_LDR_5x4: return 5; + case block_format::cASTC_LDR_5x5: return 5; + case block_format::cASTC_LDR_6x5: return 6; + case block_format::cASTC_LDR_6x6: return 6; + case block_format::cASTC_LDR_8x5: return 8; + case block_format::cASTC_LDR_8x6: return 8; + case block_format::cASTC_LDR_10x5: return 10; + case block_format::cASTC_LDR_10x6: return 10; + case block_format::cASTC_LDR_8x8: return 8; + case block_format::cASTC_LDR_10x8: return 10; + case block_format::cASTC_LDR_10x10: return 10; + case block_format::cASTC_LDR_12x10: return 12; + case block_format::cASTC_LDR_12x12: return 12; + + default: + break; + } + return 4; + } + + inline uint32_t get_block_height(block_format fmt) + { + switch (fmt) + { + case block_format::cASTC_HDR_6x6: + return 6; + + case block_format::cASTC_LDR_5x5: return 5; + case block_format::cASTC_LDR_6x5: return 5; + case block_format::cASTC_LDR_6x6: return 6; + case block_format::cASTC_LDR_8x5: return 5; + case block_format::cASTC_LDR_8x6: return 6; + case block_format::cASTC_LDR_10x5: return 5; + case block_format::cASTC_LDR_10x6: return 6; + case block_format::cASTC_LDR_8x8: return 8; + case block_format::cASTC_LDR_10x8: return 8; + case block_format::cASTC_LDR_10x10: return 10; + case block_format::cASTC_LDR_12x10: return 10; + case block_format::cASTC_LDR_12x12: return 12; + + default: + break; + } + return 4; + } + + const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31; + const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21; + const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9; + const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3; + + const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1; + const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1; + const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3; + const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4; + + const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds); + const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; + const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS; + const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + uint16_t crc16(const void *r, size_t size, uint16_t crc); + + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + + template + struct bit_hasher + { + inline std::size_t operator()(const Key& k) const + { + return hash_hsieh(reinterpret_cast(&k), sizeof(k)); + } + }; + + struct string_hasher + { + inline std::size_t operator()(const std::string& k) const + { + size_t l = k.size(); + if (!l) + return 0; + return hash_hsieh(reinterpret_cast(k.c_str()), l); + } + }; + + class huffman_decoding_table + { + friend class bitwise_decoder; + + public: + huffman_decoding_table() + { + } + + void clear() + { + basisu::clear_vector(m_code_sizes); + basisu::clear_vector(m_lookup); + basisu::clear_vector(m_tree); + } + + bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) + { + if (!total_syms) + { + clear(); + return true; + } + + m_code_sizes.resize(total_syms); + memcpy(&m_code_sizes[0], pCode_sizes, total_syms); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + + m_lookup.resize(0); + m_lookup.resize(huffman_fast_lookup_size); + + m_tree.resize(0); + m_tree.resize(total_syms * 2); + + uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + basisu::clear_obj(syms_using_codesize); + for (uint32_t i = 0; i < total_syms; i++) + { + if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize) + return false; + syms_using_codesize[pCode_sizes[i]]++; + } + + uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + next_code[0] = next_code[1] = 0; + + uint32_t used_syms = 0, total = 0; + for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++) + { + used_syms += syms_using_codesize[i]; + next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1)); + } + + if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U)) + return false; + + for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index) + { + uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index]; + if (!code_size) + continue; + + cur_code = next_code[code_size]++; + + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + + if (code_size <= fast_lookup_bits) + { + uint32_t k = (code_size << 16) | sym_index; + while (rev_code < huffman_fast_lookup_size) + { + if (m_lookup[rev_code] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + + int tree_cur; + if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) + { + const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); + if (m_lookup[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[idx] = tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + rev_code >>= (fast_lookup_bits - 1); + + for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (!m_tree[idx]) + { + m_tree[idx] = (int16_t)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + else + { + tree_cur = m_tree[idx]; + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + } + } + + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (m_tree[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_tree[idx] = (int16_t)sym_index; + } + + return true; + } + + const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } + const basisu::int_vec &get_lookup() const { return m_lookup; } + const basisu::int16_vec &get_tree() const { return m_tree; } + + bool is_valid() const { return m_code_sizes.size() > 0; } + + private: + basisu::uint8_vec m_code_sizes; + basisu::int_vec m_lookup; + basisu::int16_vec m_tree; + }; + + class bitwise_decoder + { + public: + bitwise_decoder() : + m_buf_size(0), + m_pBuf(nullptr), + m_pBuf_start(nullptr), + m_pBuf_end(nullptr), + m_bit_buf(0), + m_bit_buf_size(0) + { + } + + void clear() + { + m_buf_size = 0; + m_pBuf = nullptr; + m_pBuf_start = nullptr; + m_pBuf_end = nullptr; + m_bit_buf = 0; + m_bit_buf_size = 0; + } + + bool init(const uint8_t *pBuf, uint32_t buf_size) + { + if ((!pBuf) && (buf_size)) + return false; + + m_buf_size = buf_size; + m_pBuf = pBuf; + m_pBuf_start = pBuf; + m_pBuf_end = pBuf + buf_size; + m_bit_buf = 0; + m_bit_buf_size = 0; + return true; + } + + void stop() + { + } + + inline uint32_t peek_bits(uint32_t num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 25); + + while (m_bit_buf_size < num_bits) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + return m_bit_buf & ((1 << num_bits) - 1); + } + + void remove_bits(uint32_t num_bits) + { + assert(m_bit_buf_size >= num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + } + + uint32_t get_bits(uint32_t num_bits) + { + if (num_bits > 25) + { + assert(num_bits <= 32); + + const uint32_t bits0 = peek_bits(25); + m_bit_buf >>= 25; + m_bit_buf_size -= 25; + num_bits -= 25; + + const uint32_t bits = peek_bits(num_bits); + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits0 | (bits << 25); + } + + const uint32_t bits = peek_bits(num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits; + } + + uint32_t decode_truncated_binary(uint32_t n) + { + assert(n >= 2); + + const uint32_t k = basisu::floor_log2i(n); + const uint32_t u = (1 << (k + 1)) - n; + + uint32_t result = get_bits(k); + + if (result >= u) + result = ((result << 1) | get_bits(1)) - u; + + return result; + } + + uint32_t decode_rice(uint32_t m) + { + assert(m); + + uint32_t q = 0; + for (;;) + { + uint32_t k = peek_bits(16); + + uint32_t l = 0; + while (k & 1) + { + l++; + k >>= 1; + } + + q += l; + + remove_bits(l); + + if (l < 16) + break; + } + + return (q << m) + (get_bits(m + 1) >> 1); + } + + inline uint32_t decode_vlc(uint32_t chunk_bits) + { + assert(chunk_bits); + + const uint32_t chunk_size = 1 << chunk_bits; + const uint32_t chunk_mask = chunk_size - 1; + + uint32_t v = 0; + uint32_t ofs = 0; + + for ( ; ; ) + { + uint32_t s = get_bits(chunk_bits + 1); + v |= ((s & chunk_mask) << ofs); + ofs += chunk_bits; + + if ((s & chunk_size) == 0) + break; + + if (ofs >= 32) + { + assert(0); + break; + } + } + + return v; + } + + inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) + { + assert(ct.m_code_sizes.size()); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + + while (m_bit_buf_size < 16) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + int code_len; + + int sym; + if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) + { + code_len = sym >> 16; + sym &= 0xFFFF; + } + else + { + code_len = fast_lookup_bits; + do + { + sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 + } while (sym < 0); + } + + m_bit_buf >>= code_len; + m_bit_buf_size -= code_len; + + return sym; + } + + bool read_huffman_table(huffman_decoding_table &ct) + { + ct.clear(); + + const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2); + + if (!total_used_syms) + return true; + if (total_used_syms > basisu::cHuffmanMaxSyms) + return false; + + uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes]; + basisu::clear_obj(code_length_code_sizes); + + const uint32_t num_codelength_codes = get_bits(5); + if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes)) + return false; + + for (uint32_t i = 0; i < num_codelength_codes; i++) + code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast(get_bits(3)); + + huffman_decoding_table code_length_table; + if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes)) + return false; + + if (!code_length_table.is_valid()) + return false; + + basisu::uint8_vec code_sizes(total_used_syms); + + uint32_t cur = 0; + while (cur < total_used_syms) + { + int c = decode_huffman(code_length_table); + + if (c <= 16) + code_sizes[cur++] = static_cast(c); + else if (c == basisu::cHuffmanSmallZeroRunCode) + cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin; + else if (c == basisu::cHuffmanBigZeroRunCode) + cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin; + else + { + if (!cur) + return false; + + uint32_t l; + if (c == basisu::cHuffmanSmallRepeatCode) + l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin; + else + l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin; + + const uint8_t prev = code_sizes[cur - 1]; + if (prev == 0) + return false; + do + { + if (cur >= total_used_syms) + return false; + code_sizes[cur++] = prev; + } while (--l > 0); + } + } + + if (cur != total_used_syms) + return false; + + return ct.init(total_used_syms, &code_sizes[0]); + } + + size_t get_bits_remaining() const + { + size_t total_bytes_remaining = m_pBuf_end - m_pBuf; + return total_bytes_remaining * 8 + m_bit_buf_size; + } + + private: + uint32_t m_buf_size; + const uint8_t *m_pBuf; + const uint8_t *m_pBuf_start; + const uint8_t *m_pBuf_end; + + uint32_t m_bit_buf; + uint32_t m_bit_buf_size; + }; + + class simplified_bitwise_decoder + { + public: + simplified_bitwise_decoder() : + m_pBuf(nullptr), + m_pBuf_end(nullptr), + m_bit_buf(0) + { + } + + void clear() + { + m_pBuf = nullptr; + m_pBuf_end = nullptr; + m_bit_buf = 0; + } + + bool init(const uint8_t* pBuf, size_t buf_size) + { + if ((!pBuf) && (buf_size)) + return false; + + m_pBuf = pBuf; + m_pBuf_end = pBuf + buf_size; + m_bit_buf = 1; + return true; + } + + bool init(const basisu::uint8_vec& buf) + { + return init(buf.data(), buf.size()); + } + + // num_bits must be 1, 2, 4 or 8 and codes cannot cross bytes + inline uint32_t get_bits(uint32_t num_bits) + { + assert(m_pBuf); + + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + + const uint32_t mask = (1 << num_bits) - 1; + const uint32_t res = m_bit_buf & mask; + m_bit_buf >>= num_bits; + assert(m_bit_buf >= 1); + + return res; + } + + inline uint32_t get_bits1() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 1; + m_bit_buf >>= 1; + assert(m_bit_buf >= 1); + return res; + } + + inline uint32_t get_bits2() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 3; + m_bit_buf >>= 2; + assert(m_bit_buf >= 1); + return res; + } + + inline uint32_t get_bits4() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 15; + m_bit_buf >>= 4; + assert(m_bit_buf >= 1); + return res; + } + + // No bitbuffer, can only ever retrieve bytes correctly. + inline uint32_t get_bits8() + { + assert(m_pBuf); + return (m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0; + } + + const uint8_t* m_pBuf; + const uint8_t* m_pBuf_end; + uint32_t m_bit_buf; + }; + + inline uint32_t basisd_rand(uint32_t seed) + { + if (!seed) + seed++; + uint32_t z = seed; + BASISD_znew; + return z; + } + + // Returns random number in [0,limit). Max limit is 0xFFFF. + inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit) + { + seed = basisd_rand(seed); + return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16; + } + + class approx_move_to_front + { + public: + approx_move_to_front(uint32_t n) + { + init(n); + } + + void init(uint32_t n) + { + m_values.resize(n); + m_rover = n / 2; + } + + const basisu::int_vec& get_values() const { return m_values; } + basisu::int_vec& get_values() { return m_values; } + + uint32_t size() const { return (uint32_t)m_values.size(); } + + const int& operator[] (uint32_t index) const { return m_values[index]; } + int operator[] (uint32_t index) { return m_values[index]; } + + void add(int new_value) + { + m_values[m_rover++] = new_value; + if (m_rover == m_values.size()) + m_rover = (uint32_t)m_values.size() / 2; + } + + void use(uint32_t index) + { + if (index) + { + //std::swap(m_values[index / 2], m_values[index]); + int x = m_values[index / 2]; + int y = m_values[index]; + m_values[index / 2] = y; + m_values[index] = x; + } + } + + // returns -1 if not found + int find(int value) const + { + for (uint32_t i = 0; i < m_values.size(); i++) + if (m_values[i] == value) + return i; + return -1; + } + + void reset() + { + const uint32_t n = (uint32_t)m_values.size(); + + m_values.clear(); + + init(n); + } + + private: + basisu::int_vec m_values; + uint32_t m_rover; + }; + + struct decoder_etc_block; + + inline uint8_t clamp255(int32_t i) + { + return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); + } + + enum eNoClamp + { + cNoClamp = 0 + }; + + struct color32 + { + union + { + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + //color32() { } + color32() = default; + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } + + void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } + + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } + + uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } + uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } + + bool operator== (const color32&rhs) const { return m == rhs.m; } + + static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } + }; + + struct endpoint + { + color32 m_color5; + uint8_t m_inten5; + bool operator== (const endpoint& rhs) const + { + return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); + } + bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } + }; + + // This duplicates key functionality in the encoder library's color_rgba class. Porting and retesting code that uses it to color32 is impractical. + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + inline color_rgba(const color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba& set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba& set(int y) + { + m_comps[0] = static_cast(basisu::clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba& set(int y, int na) + { + m_comps[0] = static_cast(basisu::clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(basisu::clamp(na, 0, 255)); + return *this; + } + + inline color_rgba& set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(basisu::clamp(sr, 0, 255)); + m_comps[1] = static_cast(basisu::clamp(sg, 0, 255)); + m_comps[2] = static_cast(basisu::clamp(sb, 0, 255)); + m_comps[3] = static_cast(basisu::clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba& set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(basisu::clamp(sr, 0, 255)); + m_comps[1] = static_cast(basisu::clamp(sg, 0, 255)); + m_comps[2] = static_cast(basisu::clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba& set_rgb(const color_rgba& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t& operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t& operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba& rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba& rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba& rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline color32 get_color32() const + { + return color32(r, g, b, a); + } + + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + }; + + struct selector + { + // Plain selectors (2-bits per value) + uint8_t m_selectors[4]; + + // ETC1 selectors + uint8_t m_bytes[4]; + + uint8_t m_lo_selector, m_hi_selector; + uint8_t m_num_unique_selectors; + bool operator== (const selector& rhs) const + { + return (m_selectors[0] == rhs.m_selectors[0]) && + (m_selectors[1] == rhs.m_selectors[1]) && + (m_selectors[2] == rhs.m_selectors[2]) && + (m_selectors[3] == rhs.m_selectors[3]); + } + bool operator!= (const selector& rhs) const + { + return !(*this == rhs); + } + + void init_flags() + { + uint32_t hist[4] = { 0, 0, 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = get_selector(x, y); + hist[s]++; + } + } + + m_lo_selector = 3; + m_hi_selector = 0; + m_num_unique_selectors = 0; + + for (uint32_t i = 0; i < 4; i++) + { + if (hist[i]) + { + m_num_unique_selectors++; + if (i < m_lo_selector) m_lo_selector = static_cast(i); + if (i > m_hi_selector) m_hi_selector = static_cast(i); + } + } + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_selectors[y] >> (x * 2)) & 3; + } + + void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; + + assert((x | y | val) < 4); + + m_selectors[y] &= ~(3 << (x * 2)); + m_selectors[y] |= (val << (x * 2)); + + const uint32_t etc1_bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)]; + + const uint32_t byte_bit_ofs = etc1_bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = s_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + }; + + bool basis_block_format_is_uncompressed(block_format tex_type); + + //------------------------------------ + + typedef uint16_t half_float; + + const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number + const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number + const double MAX_HALF_FLOAT = 65504.0; // largest normal number + const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0 + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + inline bool is_half_denorm(half_float v) + { + int e = (v >> 10) & 31; + return !e; + } + + inline int get_half_exp(half_float v) + { + int e = ((v >> 10) & 31); + return e ? (e - 15) : -14; + } + + inline int get_half_mantissa(half_float v) + { + if (is_half_denorm(v)) + return v & 0x3FF; + return (v & 0x3FF) | 0x400; + } + + inline float get_half_mantissaf(half_float v) + { + return ((float)get_half_mantissa(v)) / 1024.0f; + } + + inline int get_half_sign(half_float v) + { + return v ? ((v & 0x8000) ? -1 : 1) : 0; + } + + inline bool half_is_signed(half_float v) + { + return (v & 0x8000) != 0; + } + +#if 0 + int hexp = get_half_exp(Cf); + float hman = get_half_mantissaf(Cf); + int hsign = get_half_sign(Cf); + float k = powf(2.0f, hexp) * hman * hsign; + if (is_half_inf_or_nan(Cf)) + k = std::numeric_limits::quiet_NaN(); +#endif + + half_float float_to_half(float val); + + inline float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + // Originally from bc6h_enc.h + + void bc6h_enc_init(); + + const uint32_t MAX_BLOG16_VAL = 0xFFFF; + + // BC6H internals + const uint32_t NUM_BC6H_MODES = 14; + const uint32_t BC6H_LAST_MODE_INDEX = 13; + const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights + const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32; + + extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b + + struct bc6h_bit_layout + { + int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index) + int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d) + int8_t m_last_bit; + int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed + }; + + const uint32_t MAX_BC6H_LAYOUT_INDEX = 25; + extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX]; + + extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x] + + extern const uint8_t g_bc6h_weight3[8]; + extern const uint8_t g_bc6h_weight4[16]; + + extern const int8_t g_bc6h_mode_lookup[32]; + + // Converts b16 to half float + inline half_float bc6h_blog16_to_half(uint32_t comp) + { + assert(comp <= 0xFFFF); + + // scale the magnitude by 31/64 + comp = (comp * 31u) >> 6u; + return (half_float)comp; + } + + const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF; + + // Inverts bc6h_blog16_to_half(). + // Returns the nearest blog16 given a half value. + inline uint32_t bc6h_half_to_blog16(half_float h) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / 31; + } + + // Suboptimal, but very close. + inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / (31 * (1 << (16 - num_bits))); + } + + struct bc6h_block + { + uint8_t m_bytes[16]; + }; + + void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]); + + struct bc6h_logical_block + { + uint32_t m_mode; + uint32_t m_partition_pattern; // must be 0 if 1 subset + uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed + uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0 + + void clear() + { + basisu::clear_obj(*this); + } + }; + + void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk); + + namespace bc7_mode_5_encoder + { + void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode); + } + + namespace astc_6x6_hdr + { + extern uint8_t g_quantize_tables_preserve2[21 - 1][256]; // astc_helpers::TOTAL_ISE_RANGES=21 + extern uint8_t g_quantize_tables_preserve3[21 - 1][256]; + } // namespace astc_6x6_hdr + +#if BASISD_SUPPORT_XUASTC + namespace astc_ldr_t + { + const uint32_t ARITH_HEADER_MARKER = 0x01; + const uint32_t ARITH_HEADER_MARKER_BITS = 5; + + const uint32_t FULL_ZSTD_HEADER_MARKER = 0x01; + const uint32_t FULL_ZSTD_HEADER_MARKER_BITS = 5; + + const uint32_t FINAL_SYNC_MARKER = 0xAF; + const uint32_t FINAL_SYNC_MARKER_BITS = 8; + + const uint32_t cMaxConfigReuseNeighbors = 3; + +#pragma pack(push, 1) + struct xuastc_ldr_arith_header + { + uint8_t m_flags; + basisu::packed_uint<4> m_arith_bytes_len; + basisu::packed_uint<4> m_mean0_bits_len; + basisu::packed_uint<4> m_mean1_bytes_len; + basisu::packed_uint<4> m_run_bytes_len; + basisu::packed_uint<4> m_coeff_bytes_len; + basisu::packed_uint<4> m_sign_bits_len; + basisu::packed_uint<4> m_weight2_bits_len; // 2-bit weights (4 per byte), up to BISE_4_LEVELS + basisu::packed_uint<4> m_weight3_bits_len; // 3-bit weights (2 per byte), up to BISE_8_LEVELS + basisu::packed_uint<4> m_weight4_bits_len; // 4-bit weights (2 per byte), up to BISE_16_LEVELS + basisu::packed_uint<4> m_weight8_bytes_len; // 8-bit weights (1 per byte), up to BISE_32_LEVELS + basisu::packed_uint<4> m_unused; // Future expansion + }; + + struct xuastc_ldr_full_zstd_header + { + uint8_t m_flags; + + // Control + basisu::packed_uint<4> m_raw_bits_len; // uncompressed + basisu::packed_uint<4> m_mode_bytes_len; + basisu::packed_uint<4> m_solid_dpcm_bytes_len; + + // Endpoint DPCM + basisu::packed_uint<4> m_endpoint_dpcm_reuse_indices_len; + basisu::packed_uint<4> m_use_bc_bits_len; + basisu::packed_uint<4> m_endpoint_dpcm_3bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_4bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_5bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_6bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_7bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_8bit_len; + + // Weight grid DCT + basisu::packed_uint<4> m_mean0_bits_len; + basisu::packed_uint<4> m_mean1_bytes_len; + basisu::packed_uint<4> m_run_bytes_len; + basisu::packed_uint<4> m_coeff_bytes_len; + basisu::packed_uint<4> m_sign_bits_len; + + // Weight DPCM + basisu::packed_uint<4> m_weight2_bits_len; // 2-bit weights (4 per byte), up to BISE_4_LEVELS + basisu::packed_uint<4> m_weight3_bits_len; // 3-bit weights (4 per byte), up to BISE_8_LEVELS + basisu::packed_uint<4> m_weight4_bits_len; // 4-bit weights (2 per byte), up to BISE_16_LEVELS + basisu::packed_uint<4> m_weight8_bytes_len; // 8-bit weights (1 per byte), up to BISE_32_LEVELS + + basisu::packed_uint<4> m_unused; // Future expansion + }; +#pragma pack(pop) + + const uint32_t DCT_RUN_LEN_EOB_SYM_INDEX = 64; + const uint32_t DCT_MAX_ARITH_COEFF_MAG = 255; + + const uint32_t DCT_MEAN_LEVELS0 = 9, DCT_MEAN_LEVELS1 = 33; + + const uint32_t PART_HASH_BITS = 6u; + const uint32_t PART_HASH_SIZE = 1u << PART_HASH_BITS; + + const uint32_t TM_HASH_BITS = 7u; + const uint32_t TM_HASH_SIZE = 1u << TM_HASH_BITS; + + typedef basisu::vector fvec; + + void init(); + + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b); + color_rgba blue_contract_dec(int enc_r, int enc_g, int enc_b, int enc_a); + + struct astc_block_grid_config + { + uint16_t m_block_width, m_block_height; + uint16_t m_grid_width, m_grid_height; + + astc_block_grid_config() {} + + astc_block_grid_config(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) + { + assert((block_width >= 4) && (block_width <= 12)); + assert((block_height >= 4) && (block_height <= 12)); + m_block_width = (uint16_t)block_width; + m_block_height = (uint16_t)block_height; + + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + m_grid_width = (uint16_t)grid_width; + m_grid_height = (uint16_t)grid_height; + } + + bool operator==(const astc_block_grid_config& other) const + { + return (m_block_width == other.m_block_width) && (m_block_height == other.m_block_height) && + (m_grid_width == other.m_grid_width) && (m_grid_height == other.m_grid_height); + } + }; + + struct astc_block_grid_data + { + float m_weight_gamma; + + // An unfortunate difference of containers, but in memory these matrices are both addressed as [r][c]. + basisu::vector2D m_upsample_matrix; + + basisu::vector m_downsample_matrix; + + astc_block_grid_data() {} + astc_block_grid_data(float weight_gamma) : m_weight_gamma(weight_gamma) {} + }; + + typedef basisu::hash_map > astc_block_grid_data_hash_t; + + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color32& l, color32& h); + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color32& l, color32& h, float* pScale = nullptr); + + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h); + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale = nullptr); + + void compute_adjoint_downsample_matrix(basisu::vector& downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + class dct2f + { + enum { cMaxSize = 12 }; + + public: + dct2f() : m_rows(0u), m_cols(0u) {} + + // call with grid_height/grid_width (INVERTED) + bool init(uint32_t rows, uint32_t cols); + + uint32_t rows() const { return m_rows; } + uint32_t cols() const { return m_cols; } + + void forward(const float* pSrc, float* pDst, fvec& work) const; + + void inverse(const float* pSrc, float* pDst, fvec& work) const; + + // check variants use a less optimized implementation, used for sanity checking + void inverse_check(const float* pSrc, float* pDst, fvec& work) const; + + void forward(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + void inverse(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + void inverse_check(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + private: + uint32_t m_rows, m_cols; + fvec m_c_col; // [u*m_rows + x] + fvec m_c_row; // [v*m_cols + y] + fvec m_a_col; // alpha(u) + fvec m_a_row; // alpha(v) + }; + + struct dct_syms + { + dct_syms() + { + clear(); + } + + void clear() + { + m_dc_sym = 0; + m_num_dc_levels = 0; + m_coeffs.resize(0); + m_max_coeff_mag = 0; + m_max_zigzag_index = 0; + } + + uint32_t m_dc_sym; + uint32_t m_num_dc_levels; + + struct coeff + { + uint16_t m_num_zeros; + int16_t m_coeff; // or INT16_MAX if invalid + + coeff() {} + coeff(uint16_t num_zeros, int16_t coeff) : m_num_zeros(num_zeros), m_coeff(coeff) {} + }; + + basisu::static_vector m_coeffs; + + uint32_t m_max_coeff_mag; + uint32_t m_max_zigzag_index; + }; + + struct grid_dim_key + { + int m_grid_width; + int m_grid_height; + + grid_dim_key() {} + + grid_dim_key(int w, int h) : m_grid_width(w), m_grid_height(h) {} + + bool operator== (const grid_dim_key& rhs) const + { + return (m_grid_width == rhs.m_grid_width) && (m_grid_height == rhs.m_grid_height); + } + }; + + struct grid_dim_value + { + basisu::int_vec m_zigzag; + dct2f m_dct; + }; + + typedef basisu::hash_map > grid_dim_hash_map; + + void init_astc_block_grid_data_hash(); + + const astc_block_grid_data* find_astc_block_grid_data(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + const float DEADZONE_ALPHA = .5f; + const float SCALED_WEIGHT_BASE_CODING_SCALE = .5f; // typically ~5 bits [0,32], or 3 [0,8] + + struct sample_quant_table_state + { + float m_q, m_sx, m_sy, m_level_scale; + + void init(float q, + uint32_t block_width, uint32_t block_height, + float level_scale) + { + m_q = q; + m_level_scale = level_scale; + + const int Bx = block_width, By = block_height; + + m_sx = (float)8.0f / (float)Bx; + m_sy = (float)8.0f / (float)By; + } + }; + + class grid_weight_dct + { + public: + grid_weight_dct() { } + + void init(uint32_t block_width, uint32_t block_height); + + static uint32_t get_num_weight_dc_levels(uint32_t weight_ise_range) + { + float scaled_weight_coding_scale = SCALED_WEIGHT_BASE_CODING_SCALE; + if (weight_ise_range <= astc_helpers::BISE_8_LEVELS) + scaled_weight_coding_scale = 1.0f / 8.0f; + + return (uint32_t)(64.0f * scaled_weight_coding_scale) + 1; + } + + struct block_stats + { + float m_mean_weight; + uint32_t m_total_coded_acs; + uint32_t m_max_ac_coeff; + }; + + bool decode_block_weights( + float q, uint32_t plane_index, // plane of weights to decode and IDCT from stream + astc_helpers::log_astc_block& log_blk, // must be initialized except for the plane weights which are decoded + basist::bitwise_decoder* pDec, + const astc_block_grid_data* pGrid_data, // grid data for this grid size + block_stats* pS, + fvec& dct_work, // thread local + const dct_syms* pSyms = nullptr) const; + + enum { m_zero_run = 3, m_coeff = 2 }; + + uint32_t m_block_width, m_block_height; + + grid_dim_hash_map m_grid_dim_key_vals; + + // Adaptively compensate for weight level quantization noise being fed into the DCT. + // The more coursely the weight levels are quantized, the more noise injected, and the more noise will be spread between multiple AC coefficients. + // This will cause some previously 0 coefficients to increase in mag, but they're likely noise. So carefully nudge the quant step size to compensate. + static float scale_quant_steps(int Q_astc, float gamma = 0.1f /*.13f*/, float clamp_max = 2.0f) + { + assert(Q_astc >= 2); + float factor = 63.0f / (Q_astc - 1); + // TODO: Approximate powf() + float scaled = powf(factor, gamma); + scaled = basisu::clamp(scaled, 1.0f, clamp_max); + return scaled; + } + + float compute_level_scale(float q, float span_len, float weight_gamma, uint32_t grid_width, uint32_t grid_height, uint32_t weight_ise_range) const; + + int sample_quant_table(sample_quant_table_state& state, uint32_t x, uint32_t y) const; + + void compute_quant_table(float q, + uint32_t grid_width, uint32_t grid_height, + float level_scale, int* dct_quant_tab) const; + + float get_max_span_len(const astc_helpers::log_astc_block& log_blk, uint32_t plane_index) const; + + inline int quantize_deadzone(float d, int L, float alpha, uint32_t x, uint32_t y) const + { + assert((x < m_block_width) && (y < m_block_height)); + + if (((x == 1) && (y == 0)) || + ((x == 0) && (y == 1))) + { + return (int)std::round(d / (float)L); + } + + // L = quant step, alpha in [0,1.2] (typical 0.7–0.85) + if (L <= 0) + return 0; + + float s = fabsf(d); + float tau = alpha * float(L); // half-width of the zero band + + if (s <= tau) + return 0; // inside dead-zone towards zero + + // Quantize the residual outside the dead-zone with mid-tread rounding + float qf = (s - tau) / float(L); + int q = (int)floorf(qf + 0.5f); // ties-nearest + return (d < 0.0f) ? -q : q; + } + + inline float dequant_deadzone(int q, int L, float alpha, uint32_t x, uint32_t y) const + { + assert((x < m_block_width) && (y < m_block_height)); + + if (((x == 1) && (y == 0)) || + ((x == 0) && (y == 1))) + { + return (float)q * (float)L; + } + + if (q == 0 || L <= 0) + return 0.0f; + + float tau = alpha * float(L); + float mag = tau + float(abs(q)) * float(L); // center of the (nonzero) bin + return (q < 0) ? -mag : mag; + } + }; + + struct trial_mode + { + uint32_t m_grid_width; + uint32_t m_grid_height; + uint32_t m_cem; + int m_ccs_index; + uint32_t m_endpoint_ise_range; + uint32_t m_weight_ise_range; + uint32_t m_num_parts; + + bool operator==(const trial_mode& other) const + { +#define BU_COMP(a) if (a != other.a) return false; + BU_COMP(m_grid_width); + BU_COMP(m_grid_height); + BU_COMP(m_cem); + BU_COMP(m_ccs_index); + BU_COMP(m_endpoint_ise_range); + BU_COMP(m_weight_ise_range); + BU_COMP(m_num_parts); +#undef BU_COMP + return true; + } + + bool operator<(const trial_mode& rhs) const + { +#define BU_COMP(a) if (a < rhs.a) return true; else if (a > rhs.a) return false; + BU_COMP(m_grid_width); + BU_COMP(m_grid_height); + BU_COMP(m_cem); + BU_COMP(m_ccs_index); + BU_COMP(m_endpoint_ise_range); + BU_COMP(m_weight_ise_range); + BU_COMP(m_num_parts); +#undef BU_COMP + return false; + } + + operator size_t() const + { + size_t h = 0xABC1F419; +#define BU_FIELD(a) do { h ^= hash_hsieh(reinterpret_cast(&a), sizeof(a)); } while(0) + BU_FIELD(m_grid_width); + BU_FIELD(m_grid_height); + BU_FIELD(m_cem); + BU_FIELD(m_ccs_index); + BU_FIELD(m_endpoint_ise_range); + BU_FIELD(m_weight_ise_range); + BU_FIELD(m_num_parts); +#undef BU_FIELD + return h; + } + }; + + // Organize trial modes for faster initial mode triaging. + const uint32_t OTM_NUM_CEMS = 14; // 0-13 (13=highest valid LDR CEM) + const uint32_t OTM_NUM_SUBSETS = 3; // 1-3 + const uint32_t OTM_NUM_CCS = 5; // -1 to 3 + const uint32_t OTM_NUM_GRID_SIZES = 2; // 0=small or 1=large (grid_w>=block_w-1 and grid_h>=block_h-1) + const uint32_t OTM_NUM_GRID_ANISOS = 3; // 0=W=H, 1=W>H, 2=W 0) && (gh > 0)); + assert((bw > 0) && (bh > 0)); + assert((gw <= 12) && (gh <= 12) && (bw <= 12) && (bh <= 12)); + assert((gw <= bw) && (gh <= bh)); + +#if 0 + // Prev. code: + uint32_t grid_aniso = 0; + if (tm.m_grid_width != tm.m_grid_height) // not optimal for non-square block sizes + { + const float grid_x_fract = (float)tm.m_grid_width / (float)block_width; + const float grid_y_fract = (float)tm.m_grid_height / (float)block_height; + if (grid_x_fract >= grid_y_fract) + grid_aniso = 1; + else if (grid_x_fract < grid_y_fract) + grid_aniso = 2; + } +#endif + // Compare gw/bw vs. gh/bh using integer math: + // gw*bh >= gh*bw -> X-dominant (1), else Y-dominant (2) + const uint32_t lhs = gw * bh; + const uint32_t rhs = gh * bw; + + // Equal (isotropic), X=Y + if (lhs == rhs) + return 0; + + // Anisotropic - 1=X, 2=Y + return (lhs >= rhs) ? 1 : 2; + } + + struct grouped_trial_modes + { + basisu::uint_vec m_tm_groups[OTM_NUM_CEMS][OTM_NUM_SUBSETS][OTM_NUM_CCS][OTM_NUM_GRID_SIZES][OTM_NUM_GRID_ANISOS]; // indices of encoder trial modes in each bucket + + void clear() + { + for (uint32_t cem_iter = 0; cem_iter < OTM_NUM_CEMS; cem_iter++) + for (uint32_t subsets_iter = 0; subsets_iter < OTM_NUM_SUBSETS; subsets_iter++) + for (uint32_t ccs_iter = 0; ccs_iter < OTM_NUM_CCS; ccs_iter++) + for (uint32_t grid_sizes_iter = 0; grid_sizes_iter < OTM_NUM_GRID_SIZES; grid_sizes_iter++) + for (uint32_t grid_anisos_iter = 0; grid_anisos_iter < OTM_NUM_GRID_ANISOS; grid_anisos_iter++) + m_tm_groups[cem_iter][subsets_iter][ccs_iter][grid_sizes_iter][grid_anisos_iter].clear(); + } + + void add(uint32_t block_width, uint32_t block_height, + const trial_mode& tm, uint32_t tm_index) + { + const uint32_t cem_index = tm.m_cem; + assert(cem_index < OTM_NUM_CEMS); + + const uint32_t subset_index = tm.m_num_parts - 1; + assert(subset_index < OTM_NUM_SUBSETS); + + const uint32_t ccs_index = tm.m_ccs_index + 1; + assert(ccs_index < OTM_NUM_CCS); + + const uint32_t grid_size = (tm.m_grid_width >= (block_width - 1)) && (tm.m_grid_height >= (block_height - 1)); + const uint32_t grid_aniso = calc_grid_aniso_val(tm.m_grid_width, tm.m_grid_height, block_width, block_height); + + basisu::uint_vec& v = m_tm_groups[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + if (!v.capacity()) + v.reserve(64); + + v.push_back(tm_index); + } + + uint32_t count_used_groups() const + { + uint32_t n = 0; + + for (uint32_t cem_iter = 0; cem_iter < OTM_NUM_CEMS; cem_iter++) + for (uint32_t subsets_iter = 0; subsets_iter < OTM_NUM_SUBSETS; subsets_iter++) + for (uint32_t ccs_iter = 0; ccs_iter < OTM_NUM_CCS; ccs_iter++) + for (uint32_t grid_sizes_iter = 0; grid_sizes_iter < OTM_NUM_GRID_SIZES; grid_sizes_iter++) + for (uint32_t grid_anisos_iter = 0; grid_anisos_iter < OTM_NUM_GRID_ANISOS; grid_anisos_iter++) + { + if (m_tm_groups[cem_iter][subsets_iter][ccs_iter][grid_sizes_iter][grid_anisos_iter].size()) + n++; + } + return n; + } + }; + + extern grouped_trial_modes g_grouped_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + + inline const basisu::uint_vec& get_tm_candidates(const grouped_trial_modes& grouped_enc_trial_modes, + uint32_t cem_index, uint32_t subset_index, uint32_t ccs_index, uint32_t grid_size, uint32_t grid_aniso) + { + assert(cem_index < OTM_NUM_CEMS); + assert(subset_index < OTM_NUM_SUBSETS); + assert(ccs_index < OTM_NUM_CCS); + assert(grid_size < OTM_NUM_GRID_SIZES); + assert(grid_aniso < OTM_NUM_GRID_ANISOS); + + const basisu::uint_vec& modes = grouped_enc_trial_modes.m_tm_groups[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + return modes; + } + + const uint32_t CFG_PACK_GRID_BITS = 7; + const uint32_t CFG_PACK_CEM_BITS = 3; + const uint32_t CFG_PACK_CCS_BITS = 3; + const uint32_t CFG_PACK_SUBSETS_BITS = 2; + const uint32_t CFG_PACK_WISE_BITS = 4; + const uint32_t CFG_PACK_EISE_BITS = 5; + + extern const int s_unique_ldr_index_to_astc_cem[6]; + + enum class xuastc_mode + { + cMODE_SOLID = 0, + cMODE_RAW = 1, + + // Full cfg, partition ID, and all endpoint value reuse. + cMODE_REUSE_CFG_ENDPOINTS_LEFT = 2, + cMODE_REUSE_CFG_ENDPOINTS_UP = 3, + cMODE_REUSE_CFG_ENDPOINTS_DIAG = 4, + + cMODE_RUN = 5, + + cMODE_TOTAL, + }; + + enum class xuastc_zstd_mode + { + // len=1 bits + cMODE_RAW = 0b0, + + // len=2 bits + cMODE_RUN = 0b01, + + // len=4 bits + cMODE_SOLID = 0b0011, + cMODE_REUSE_CFG_ENDPOINTS_LEFT = 0b0111, + cMODE_REUSE_CFG_ENDPOINTS_UP = 0b1011, + cMODE_REUSE_CFG_ENDPOINTS_DIAG = 0b1111 + }; + + const uint32_t XUASTC_LDR_MODE_BYTE_IS_BASE_OFS_FLAG = 1 << 3; + const uint32_t XUASTC_LDR_MODE_BYTE_PART_HASH_HIT = 1 << 4; + const uint32_t XUASTC_LDR_MODE_BYTE_DPCM_ENDPOINTS_FLAG = 1 << 5; + const uint32_t XUASTC_LDR_MODE_BYTE_TM_HASH_HIT_FLAG = 1 << 6; + const uint32_t XUASTC_LDR_MODE_BYTE_USE_DCT = 1 << 7; + + enum class xuastc_ldr_syntax + { + cFullArith = 0, + cHybridArithZStd = 1, + cFullZStd = 2, + + cTotal + }; + + void create_encoder_trial_modes_table(uint32_t block_width, uint32_t block_height, + basisu::vector& encoder_trial_modes, grouped_trial_modes& grouped_encoder_trial_modes, + bool print_debug_info, bool print_modes); + + extern basisu::vector g_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + + inline uint32_t part_hash_index(uint32_t x) + { + // fib hash + return (x * 2654435769u) & (PART_HASH_SIZE - 1); + } + + // Full ZStd syntax only + inline uint32_t tm_hash_index(uint32_t x) + { + // fib hash + return (x * 2654435769u) & (TM_HASH_SIZE - 1); + } + + // TODO: Some fields are unused during transcoding. + struct prev_block_state + { + bool m_was_solid_color; + bool m_used_weight_dct; + bool m_first_endpoint_uses_bc; + bool m_reused_full_cfg; + bool m_used_part_hash; + + int m_tm_index; // -1 if invalid (solid color block) + uint32_t m_base_cem_index; // doesn't include base+ofs + uint32_t m_subset_index, m_ccs_index, m_grid_size, m_grid_aniso; + + prev_block_state() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + }; + + struct prev_block_state_full_zstd + { + int m_tm_index; // -1 if invalid (solid color block) + + bool was_solid_color() const { return m_tm_index < 0; } + + prev_block_state_full_zstd() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + }; + + inline uint32_t cem_to_ldrcem_index(uint32_t cem) + { + switch (cem) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: return 0; + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: return 1; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: return 2; + case astc_helpers::CEM_LDR_RGB_DIRECT: return 3; + case astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET: return 4; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: return 5; + case astc_helpers::CEM_LDR_RGBA_DIRECT: return 6; + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: return 7; + default: + assert(0); + break; + } + + return 0; + } + + bool pack_base_offset( + uint32_t cem_index, uint32_t dst_ise_endpoint_range, uint8_t* pPacked_endpoints, + const color_rgba& l, const color_rgba& h, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag, bool& endpoints_swapped); + + bool convert_endpoints_across_cems( + uint32_t prev_cem, uint32_t prev_endpoint_ise_range, const uint8_t* pPrev_endpoints, + uint32_t dst_cem, uint32_t dst_endpoint_ise_range, uint8_t* pDst_endpoints, + bool always_repack, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag); + + uint32_t get_total_unique_patterns(uint32_t astc_block_size_index, uint32_t num_parts); + //uint16_t unique_pat_index_to_part_seed(uint32_t astc_block_size_index, uint32_t num_parts, uint32_t unique_pat_index); + + typedef bool (*xuastc_decomp_image_init_callback_ptr)(uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData); + typedef bool (*xuastc_decomp_image_block_callback_ptr)(uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData); + + bool xuastc_ldr_decompress_image( + const uint8_t* pComp_data, size_t comp_data_size, + uint32_t& astc_block_width, uint32_t& astc_block_height, + uint32_t& actual_width, uint32_t& actual_height, bool& has_alpha, bool& uses_srgb_astc_decode_mode, + bool debug_output, + xuastc_decomp_image_init_callback_ptr pInit_callback, void *pInit_callback_data, + xuastc_decomp_image_block_callback_ptr pBlock_callback, void *pBlock_callback_data); + + } // namespace astc_ldr_t + + namespace arith_fastbits_f32 + { + enum { TABLE_BITS = 8 }; // 256..1024 entries typical (8..10) + enum { TABLE_SIZE = 1 << TABLE_BITS }; + enum { MANT_BITS = 23 }; + enum { FRAC_BITS = MANT_BITS - TABLE_BITS }; + enum { FRAC_MASK = (1u << FRAC_BITS) - 1u }; + + extern bool g_initialized; + extern float g_lut_edge[TABLE_SIZE + 1]; // samples at m = 1 + i/TABLE_SIZE (for linear) + + inline void init() + { + if (g_initialized) + return; + + const float inv_ln2 = 1.4426950408889634f; // 1/ln(2) + + for (int i = 0; i <= TABLE_SIZE; ++i) + { + float m = 1.0f + float(i) / float(TABLE_SIZE); // m in [1,2] + g_lut_edge[i] = logf(m) * inv_ln2; // log2(m) + } + + g_initialized = true; + } + + inline void unpack(float p, int& e_unbiased, uint32_t& mant) + { + // kill any denorms + if (p < FLT_MIN) + p = 0; + + union { float f; uint32_t u; } x; + x.f = p; + e_unbiased = int((x.u >> 23) & 0xFF) - 127; + mant = (x.u & 0x7FFFFFu); // 23-bit mantissa + } + + // Returns estimated bits given probability p, approximates -log2f(p). + inline float bits_from_prob_linear(float p) + { + assert((p > 0.0f) && (p <= 1.0f)); + if (!g_initialized) + init(); + + int e; uint32_t mant; + unpack(p, e, mant); + + uint32_t idx = mant >> FRAC_BITS; // 0..TABLE_SIZE-1 + uint32_t frac = mant & FRAC_MASK; // low FRAC_BITS + const float inv_scale = 1.0f / float(1u << FRAC_BITS); + float t = float(frac) * inv_scale; // [0,1) + + float y0 = g_lut_edge[idx]; + float y1 = g_lut_edge[idx + 1]; + float log2m = y0 + t * (y1 - y0); + + return -(float(e) + log2m); + } + + } // namespace arith_fastbits_f32 + + namespace arith + { + // A simple range coder + const uint32_t ArithMaxSyms = 2048; + const uint32_t DMLenShift = 15u; + const uint32_t DMMaxCount = 1u << DMLenShift; + const uint32_t BMLenShift = 13u; + const uint32_t BMMaxCount = 1u << BMLenShift; + const uint32_t ArithMinLen = 1u << 24u; + const uint32_t ArithMaxLen = UINT32_MAX; + const uint32_t ArithMinExpectedDataBufSize = 5; + + class arith_bit_model + { + public: + arith_bit_model() + { + reset(); + } + + void init() + { + reset(); + } + + void reset() + { + m_bit0_count = 1; + m_bit_count = 2; + m_bit0_prob = 1U << (BMLenShift - 1); + m_update_interval = 4; + m_bits_until_update = 4; + } + + float get_price(bool bit) const + { + const float prob_0 = (float)m_bit0_prob / (float)BMMaxCount; + const float prob = bit ? (1.0f - prob_0) : prob_0; + const float bits = arith_fastbits_f32::bits_from_prob_linear(prob); + assert(fabs(bits - (-log2f(prob))) < .00125f); // basic sanity check + return bits; + } + + void update() + { + assert(m_bit_count >= 2); + assert(m_bit0_count < m_bit_count); + + if (m_bit_count >= BMMaxCount) + { + assert(m_bit_count && m_bit0_count); + + m_bit_count = (m_bit_count + 1) >> 1; + m_bit0_count = (m_bit0_count + 1) >> 1; + + if (m_bit0_count == m_bit_count) + ++m_bit_count; + + assert(m_bit0_count < m_bit_count); + } + + const uint32_t scale = 0x80000000U / m_bit_count; + m_bit0_prob = (m_bit0_count * scale) >> (31 - BMLenShift); + + m_update_interval = basisu::clamp((5 * m_update_interval) >> 2, 4u, 128); + + m_bits_until_update = m_update_interval; + } + + void print_prices(const char* pDesc) + { + if (pDesc) + printf("arith_data_model bit prices for model %s:\n", pDesc); + for (uint32_t i = 0; i < 2; i++) + printf("%u: %3.3f bits\n", i, get_price(i)); + printf("\n"); + } + + private: + friend class arith_enc; + friend class arith_dec; + + uint32_t m_bit0_prob; // snapshot made at last update + + uint32_t m_bit0_count; // live + uint32_t m_bit_count; // live + + int m_bits_until_update; + uint32_t m_update_interval; + }; + + enum { cARITH_GAMMA_MAX_TAIL_CTX = 4, cARITH_GAMMA_MAX_PREFIX_CTX = 3 }; + struct arith_gamma_contexts + { + arith_bit_model m_ctx_prefix[cARITH_GAMMA_MAX_PREFIX_CTX]; // for unary continue prefix + arith_bit_model m_ctx_tail[cARITH_GAMMA_MAX_TAIL_CTX]; // for binary suffix bits + }; + + class arith_data_model + { + public: + arith_data_model() : + m_num_data_syms(0), + m_total_sym_freq(0), + m_update_interval(0), + m_num_syms_until_next_update(0) + { + } + + arith_data_model(uint32_t num_syms, bool faster_update = false) : + m_num_data_syms(0), + m_total_sym_freq(0), + m_update_interval(0), + m_num_syms_until_next_update(0) + { + init(num_syms, faster_update); + } + + void clear() + { + m_cum_sym_freqs.clear(); + m_sym_freqs.clear(); + + m_num_data_syms = 0; + m_total_sym_freq = 0; + m_update_interval = 0; + m_num_syms_until_next_update = 0; + } + + void init(uint32_t num_syms, bool faster_update = false) + { + assert((num_syms >= 2) && (num_syms <= ArithMaxSyms)); + + m_num_data_syms = num_syms; + + m_sym_freqs.resize(num_syms); + m_cum_sym_freqs.resize(num_syms + 1); + + reset(faster_update); + } + + void reset(bool faster_update = false) + { + if (!m_num_data_syms) + return; + + m_sym_freqs.set_all(1); + m_total_sym_freq = m_num_data_syms; + + m_update_interval = m_num_data_syms; + m_num_syms_until_next_update = 0; + + update(false); + + if (faster_update) + { + m_update_interval = basisu::clamp((m_num_data_syms + 7) / 8, 4u, (m_num_data_syms + 6) << 3); + m_num_syms_until_next_update = m_update_interval; + } + } + + void update(bool enc_flag) + { + assert(m_num_data_syms); + BASISU_NOTE_UNUSED(enc_flag); + + if (!m_num_data_syms) + return; + + while (m_total_sym_freq >= DMMaxCount) + { + m_total_sym_freq = 0; + + for (uint32_t n = 0; n < m_num_data_syms; n++) + { + m_sym_freqs[n] = (m_sym_freqs[n] + 1u) >> 1u; + m_total_sym_freq += m_sym_freqs[n]; + } + } + + const uint32_t scale = 0x80000000U / m_total_sym_freq; + + uint32_t sum = 0; + for (uint32_t i = 0; i < m_num_data_syms; ++i) + { + assert(((uint64_t)scale * sum) <= UINT32_MAX); + m_cum_sym_freqs[i] = (scale * sum) >> (31 - DMLenShift); + sum += m_sym_freqs[i]; + } + assert(sum == m_total_sym_freq); + + m_cum_sym_freqs[m_num_data_syms] = DMMaxCount; + + m_update_interval = basisu::clamp((5 * m_update_interval) >> 2, 4u, (m_num_data_syms + 6) << 3); + + m_num_syms_until_next_update = m_update_interval; + } + + float get_price(uint32_t sym_index) const + { + assert(sym_index < m_num_data_syms); + + if (sym_index >= m_num_data_syms) + return 0.0f; + + const float prob = (float)(m_cum_sym_freqs[sym_index + 1] - m_cum_sym_freqs[sym_index]) / (float)DMMaxCount; + + const float bits = arith_fastbits_f32::bits_from_prob_linear(prob); + assert(fabs(bits - (-log2f(prob))) < .00125f); // basic sanity check + return bits; + } + + void print_prices(const char* pDesc) + { + if (pDesc) + printf("arith_data_model bit prices for model %s:\n", pDesc); + for (uint32_t i = 0; i < m_num_data_syms; i++) + printf("%u: %3.3f bits\n", i, get_price(i)); + printf("\n"); + } + + uint32_t get_num_data_syms() const { return m_num_data_syms; } + + private: + friend class arith_enc; + friend class arith_dec; + + uint32_t m_num_data_syms; + + basisu::uint_vec m_sym_freqs; // live histogram + uint32_t m_total_sym_freq; // always live vs. m_sym_freqs + + basisu::uint_vec m_cum_sym_freqs; // has 1 extra entry, snapshot from last update + + uint32_t m_update_interval; + int m_num_syms_until_next_update; + + uint32_t get_last_sym_index() const { return m_num_data_syms - 1; } + }; + + class arith_enc + { + public: + arith_enc() + { + clear(); + } + + void clear() + { + m_data_buf.clear(); + + m_base = 0; + m_length = ArithMaxLen; + } + + void init(size_t reserve_size) + { + m_data_buf.reserve(reserve_size); + m_data_buf.resize(0); + + m_base = 0; + m_length = ArithMaxLen; + + // Place 8-bit marker at beginning. + // This virtually always guarantees no backwards carries can be lost at the very beginning of the stream. (Should be impossible with this design.) + // It always pushes out 1 0 byte at the very beginning to absorb future carries. + // Caller does this now, we send a tiny header anyway + //put_bits(0x1, 8); + //assert(m_data_buf[0] != 0xFF); + } + + void put_bit(uint32_t bit) + { + m_length >>= 1; + + if (bit) + { + const uint32_t orig_base = m_base; + + m_base += m_length; + + if (orig_base > m_base) + prop_carry(); + } + + if (m_length < ArithMinLen) + renorm(); + } + + enum { cMaxPutBitsLen = 20 }; + void put_bits(uint32_t val, uint32_t num_bits) + { + assert(num_bits && (num_bits <= cMaxPutBitsLen)); + assert(val < (1u << num_bits)); + + m_length >>= num_bits; + + const uint32_t orig_base = m_base; + + m_base += val * m_length; + + if (orig_base > m_base) + prop_carry(); + + if (m_length < ArithMinLen) + renorm(); + } + + // returns # of bits actually written + inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = basisu::floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + { + put_bits(v, k); + return k; + } + + uint32_t x = v + u; + assert((x >> 1) >= u); + + put_bits(x >> 1, k); + put_bits(x & 1, 1); + return k + 1; + } + + static inline uint32_t get_truncated_binary_bits(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = basisu::floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + return k; + +#ifdef _DEBUG + uint32_t x = v + u; + assert((x >> 1) >= u); +#endif + + return k + 1; + } + + inline uint32_t put_rice(uint32_t v, uint32_t m) + { + assert(m); + + uint32_t q = v >> m, r = v & ((1 << m) - 1); + + // rice coding sanity check + assert(q <= 64); + + uint32_t total_bits = q; + + // TODO: put_bits the pattern inverted in bit order + while (q) + { + put_bit(1); + q--; + } + + put_bit(0); + + put_bits(r, m); + + total_bits += (m + 1); + + return total_bits; + } + + static inline uint32_t get_rice_price(uint32_t v, uint32_t m) + { + assert(m); + + uint32_t q = v >> m; + + // rice coding sanity check + assert(q <= 64); + + uint32_t total_bits = q + 1 + m; + + return total_bits; + } + + inline void put_gamma(uint32_t n, arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return; + } + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + encode(1, ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + encode(0, ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + encode(bit, ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + } + } + + inline float put_gamma_and_return_price(uint32_t n, arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return 0.0f; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return 0.0f; + } + + float total_price = 0.0f; + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + { + total_price += ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(1); + encode(1, ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + } + + total_price += ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(0); + encode(0, ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + total_price += ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)].get_price(bit); + encode(bit, ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + } + + return total_price; + } + + // prediced price, won't be accurate if a binary arith model decides to update in between + inline float get_gamma_price(uint32_t n, const arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return 0.0f; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return 0.0f; + } + + float total_price = 0.0f; + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + total_price += ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(1); + + total_price += ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(0); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + total_price += ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)].get_price(bit); + } + + return total_price; + } + + void encode(uint32_t bit, arith_bit_model& dm) + { + uint32_t x = dm.m_bit0_prob * (m_length >> BMLenShift); + + if (!bit) + { + m_length = x; + ++dm.m_bit0_count; + } + else + { + const uint32_t orig_base = m_base; + m_base += x; + m_length -= x; + + if (orig_base > m_base) + prop_carry(); + } + ++dm.m_bit_count; + + if (m_length < ArithMinLen) + renorm(); + + if (--dm.m_bits_until_update <= 0) + dm.update(); + } + + float encode_and_return_price(uint32_t bit, arith_bit_model& dm) + { + const float price = dm.get_price(bit); + encode(bit, dm); + return price; + } + + void encode(uint32_t sym, arith_data_model& dm) + { + assert(sym < dm.m_num_data_syms); + + const uint32_t orig_base = m_base; + + if (sym == dm.get_last_sym_index()) + { + uint32_t x = dm.m_cum_sym_freqs[sym] * (m_length >> DMLenShift); + m_base += x; + m_length -= x; + } + else + { + m_length >>= DMLenShift; + uint32_t x = dm.m_cum_sym_freqs[sym] * m_length; + m_base += x; + m_length = dm.m_cum_sym_freqs[sym + 1] * m_length - x; + } + + if (orig_base > m_base) + prop_carry(); + + if (m_length < ArithMinLen) + renorm(); + + ++dm.m_sym_freqs[sym]; + ++dm.m_total_sym_freq; + + if (--dm.m_num_syms_until_next_update <= 0) + dm.update(true); + } + + float encode_and_return_price(uint32_t sym, arith_data_model& dm) + { + const float price = dm.get_price(sym); + encode(sym, dm); + return price; + } + + void flush() + { + const uint32_t orig_base = m_base; + + if (m_length <= (2 * ArithMinLen)) + { + m_base += ArithMinLen >> 1; + m_length = ArithMinLen >> 9; + } + else + { + m_base += ArithMinLen; + m_length = ArithMinLen >> 1; + } + + if (orig_base > m_base) + prop_carry(); + + renorm(); + + // Pad output to min 5 bytes - quite conservative; we're typically compressing large streams so the overhead shouldn't matter. + if (m_data_buf.size() < ArithMinExpectedDataBufSize) + m_data_buf.resize(ArithMinExpectedDataBufSize); + } + + basisu::uint8_vec& get_data_buf() { return m_data_buf; } + const basisu::uint8_vec& get_data_buf() const { return m_data_buf; } + + private: + basisu::uint8_vec m_data_buf; + uint32_t m_base, m_length; + + inline void prop_carry() + { + int64_t ofs = m_data_buf.size() - 1; + + for (; (ofs >= 0) && (m_data_buf[(size_t)ofs] == 0xFF); --ofs) + m_data_buf[(size_t)ofs] = 0; + + if (ofs >= 0) + ++m_data_buf[(size_t)ofs]; + } + + inline void renorm() + { + assert(m_length < ArithMinLen); + do + { + m_data_buf.push_back((uint8_t)(m_base >> 24u)); + m_base <<= 8u; + m_length <<= 8u; + } while (m_length < ArithMinLen); + } + }; + + class arith_dec + { + public: + arith_dec() + { + clear(); + } + + void clear() + { + m_pData_buf = nullptr; + m_pData_buf_last_byte = nullptr; + m_pData_buf_cur = nullptr; + m_data_buf_size = 0; + + m_value = 0; + m_length = 0; + } + + bool init(const uint8_t* pBuf, size_t buf_size) + { + if (buf_size < ArithMinExpectedDataBufSize) + { + assert(0); + return false; + } + + m_pData_buf = pBuf; + m_pData_buf_last_byte = pBuf + buf_size - 1; + m_pData_buf_cur = m_pData_buf + 4; + m_data_buf_size = buf_size; + + m_value = ((uint32_t)(pBuf[0]) << 24u) | ((uint32_t)(pBuf[1]) << 16u) | ((uint32_t)(pBuf[2]) << 8u) | (uint32_t)(pBuf[3]); + m_length = ArithMaxLen; + + // Check for the 8-bit marker we always place at the beginning of the stream. + //uint32_t marker = get_bits(8); + //if (marker != 0x1) + // return false; + + return true; + } + + uint32_t get_bit() + { + assert(m_data_buf_size); + + m_length >>= 1; + + uint32_t bit = (m_value >= m_length); + + if (bit) + m_value -= m_length; + + if (m_length < ArithMinLen) + renorm(); + + return bit; + } + + enum { cMaxGetBitsLen = 20 }; + + uint32_t get_bits(uint32_t num_bits) + { + assert(m_data_buf_size); + + if ((num_bits < 1) || (num_bits > cMaxGetBitsLen)) + { + assert(0); + return 0; + } + + m_length >>= num_bits; + assert(m_length); + + const uint32_t v = m_value / m_length; + + m_value -= m_length * v; + + if (m_length < ArithMinLen) + renorm(); + + return v; + } + + uint32_t decode_truncated_binary(uint32_t n) + { + assert(n >= 2); + + const uint32_t k = basisu::floor_log2i(n); + const uint32_t u = (1 << (k + 1)) - n; + + uint32_t result = get_bits(k); + + if (result >= u) + result = ((result << 1) | get_bits(1)) - u; + + return result; + } + + uint32_t decode_rice(uint32_t m) + { + assert(m); + + uint32_t q = 0; + for (;;) + { + uint32_t k = get_bit(); + if (!k) + break; + + q++; + if (q > 64) + { + assert(0); + return 0; + } + } + + return (q << m) + get_bits(m); + } + + uint32_t decode_bit(arith_bit_model& dm) + { + assert(m_data_buf_size); + + uint32_t x = dm.m_bit0_prob * (m_length >> BMLenShift); + uint32_t bit = (m_value >= x); + + if (bit == 0) + { + m_length = x; + ++dm.m_bit0_count; + } + else + { + m_value -= x; + m_length -= x; + } + ++dm.m_bit_count; + + if (m_length < ArithMinLen) + renorm(); + + if (--dm.m_bits_until_update <= 0) + dm.update(); + + return bit; + } + + inline uint32_t decode_gamma(arith_gamma_contexts& ctxs) + { + int k = 0; + while (decode_bit(ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)])) + { + ++k; + + if (k > 16) + { + // something is very wrong + assert(0); + return 0; + } + } + + int n = 1 << k; + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = decode_bit(ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + n |= (bit << i); + } + + return n; + } + + uint32_t decode_sym(arith_data_model& dm) + { + assert(m_data_buf_size); + assert(dm.m_num_data_syms); + + uint32_t x = 0, y = m_length; + + m_length >>= DMLenShift; + + uint32_t low_idx = 0, hi_idx = dm.m_num_data_syms; + uint32_t mid_idx = hi_idx >> 1; + + do + { + uint32_t z = m_length * dm.m_cum_sym_freqs[mid_idx]; + + if (z > m_value) + { + hi_idx = mid_idx; + y = z; + } + else + { + low_idx = mid_idx; + x = z; + } + mid_idx = (low_idx + hi_idx) >> 1; + + } while (mid_idx != low_idx); + + m_value -= x; + m_length = y - x; + + if (m_length < ArithMinLen) + renorm(); + + ++dm.m_sym_freqs[low_idx]; + ++dm.m_total_sym_freq; + + if (--dm.m_num_syms_until_next_update <= 0) + dm.update(false); + + return low_idx; + } + + private: + const uint8_t* m_pData_buf; + const uint8_t* m_pData_buf_last_byte; + const uint8_t* m_pData_buf_cur; + size_t m_data_buf_size; + + uint32_t m_value, m_length; + + inline void renorm() + { + do + { + const uint32_t next_byte = (m_pData_buf_cur > m_pData_buf_last_byte) ? 0 : *m_pData_buf_cur++; + + m_value = (m_value << 8u) | next_byte; + + } while ((m_length <<= 8u) < ArithMinLen); + } + }; + + } // namespace arith +#endif // BASISD_SUPPORT_XUASTC + +#if BASISD_SUPPORT_XUASTC + namespace bc7u + { + int determine_bc7_mode(const void* pBlock); + int determine_bc7_mode_4_index_mode(const void* pBlock); + int determine_bc7_mode_4_or_5_rotation(const void* pBlock); + bool unpack_bc7_mode6(const void* pBlock_bits, color_rgba* pPixels); + bool unpack_bc7(const void* pBlock, color_rgba* pPixels); + } // namespace bc7u + + namespace bc7f + { + enum + { + // Low-level BC7 encoder configuration flags. + cPackBC7FlagUse2SubsetsRGB = 1, // use mode 1/3 for RGB blocks + cPackBC7FlagUse2SubsetsRGBA = 2, // use mode 7 for RGBA blocks + + cPackBC7FlagUse3SubsetsRGB = 4, // also use mode 0/2, cPackBC7FlagUse2SubsetsRGB MUST be enabled too + + cPackBC7FlagUseDualPlaneRGB = 8, // enable mode 4/5 usage for RGB blocks + cPackBC7FlagUseDualPlaneRGBA = 16, // enable mode 4/5 usage for RGBA blocks + + cPackBC7FlagPBitOpt = 32, // enable to disable usage of fixed p-bits on some modes; slower + cPackBC7FlagPBitOptMode6 = 64, // enable to disable usage of fixed p-bits on mode 6, alpha on fully opaque blocks may be 254 however; slower + + cPackBC7FlagUseTrivialMode6 = 128, // enable trivial fast mode 6 encoder on blocks with very low variances (highly recommended) + + cPackBC7FlagPartiallyAnalyticalRGB = 256, // partially analytical mode for RGB blocks, slower but higher quality, computes actual SSE's on complex blocks to resolve which mode to use vs. predictions + cPackBC7FlagPartiallyAnalyticalRGBA = 512, // partially analytical mode for RGBA blocks, slower but higher quality, computes actual SSE's on complex blocks to resolve which mode to use vs. predictions + + // Non-analytical is really still partially analytical on the mode pairs (0 vs. 2, 1 vs 3, 4 vs. 5). + cPackBC7FlagNonAnalyticalRGB = 1024, // very slow/brute force, totally abuses the encoder, MUST use with cPackBC7FlagPartiallyAnalyticalRGB flag + cPackBC7FlagNonAnalyticalRGBA = 2048, // very slow/brute force, totally abuses the encoder, MUST use with cPackBC7FlagPartiallyAnalyticalRGBA flag + + // Default to use first: + + // Decent analytical BC7 defaults + cPackBC7FlagDefaultFastest = cPackBC7FlagUseTrivialMode6, // very weak particularly on alpha, mode 6 only for RGB/RGBA, + + // Mode 6 with pbits for RGB, Modes 4,5,6 for alpha. + cPackBC7FlagDefaultFaster = cPackBC7FlagPBitOpt | cPackBC7FlagUseDualPlaneRGBA | cPackBC7FlagUseTrivialMode6, + + cPackBC7FlagDefaultFast = cPackBC7FlagUse2SubsetsRGB | cPackBC7FlagUse2SubsetsRGBA | cPackBC7FlagUseDualPlaneRGBA | + cPackBC7FlagPBitOpt | cPackBC7FlagUseTrivialMode6, + + cPackBC7FlagDefault = (cPackBC7FlagUse2SubsetsRGB | cPackBC7FlagUse2SubsetsRGBA | cPackBC7FlagUse3SubsetsRGB) | + (cPackBC7FlagUseDualPlaneRGB | cPackBC7FlagUseDualPlaneRGBA) | + (cPackBC7FlagPBitOpt | cPackBC7FlagPBitOptMode6) | + cPackBC7FlagUseTrivialMode6, + + // Default partially analytical BC7 defaults (slower) + cPackBC7FlagDefaultPartiallyAnalytical = cPackBC7FlagDefault | (cPackBC7FlagPartiallyAnalyticalRGB | cPackBC7FlagPartiallyAnalyticalRGBA), + + // Default non-analytical BC7 defaults (very slow). In reality the encoder is still analytical on the mode pairs, but at the highest level is non-analytical. + cPackBC7FlagDefaultNonAnalytical = (cPackBC7FlagDefaultPartiallyAnalytical | (cPackBC7FlagNonAnalyticalRGB | cPackBC7FlagNonAnalyticalRGBA)) & ~cPackBC7FlagUseTrivialMode6 + }; + + void init(); + + void fast_pack_bc7_rgb_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + uint32_t fast_pack_bc7_rgb_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + void fast_pack_bc7_rgba_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + uint32_t fast_pack_bc7_rgba_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + uint32_t fast_pack_bc7_auto_rgba(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + void print_perf_stats(); + +#if 0 + // Very basic BC7 mode 6 only to ASTC. + void fast_pack_astc(void* pBlock, const color_rgba* pPixels); +#endif + + uint32_t calc_sse(const uint8_t* pBlock, const color_rgba* pPixels); + + } // namespace bc7f + + namespace etc1f + { + struct pack_etc1_state + { + uint64_t m_prev_solid_block; + //decoder_etc_block m_prev_solid_block; + + int m_prev_solid_r8; + int m_prev_solid_g8; + int m_prev_solid_b8; + + pack_etc1_state() + { + clear(); + } + + void clear() + { + m_prev_solid_r8 = -1; + m_prev_solid_g8 = -1; + m_prev_solid_b8 = -1; + } + }; + + void init(); + + void pack_etc1_solid(uint8_t* pBlock, const color_rgba& color, pack_etc1_state& state, bool init_flag = false); + + void pack_etc1(uint8_t* pBlock, const color_rgba* pPixels, pack_etc1_state& state); + + void pack_etc1_grayscale(uint8_t* pBlock, const uint8_t* pPixels, pack_etc1_state& state); + + } // namespace etc1f +#endif // BASISD_SUPPORT_XUASTC + + // Private/internal XUASTC LDR transcoding helpers + + // XUASTC LDR formats only + enum class transcoder_texture_format; + block_format xuastc_get_block_format(transcoder_texture_format tex_fmt); + +#if BASISD_SUPPORT_XUASTC + // Low-quality, but fast, PVRTC1 RGB/RGBA encoder. Power of 2 texture dimensions required. + // Note: Not yet part of our public API: this API may change! + void encode_pvrtc1( + block_format fmt, void* pDst_blocks, + const basisu::vector2D& temp_image, + uint32_t dst_num_blocks_x, uint32_t dst_num_blocks_y, bool from_alpha); + + void transcode_4x4_block( + block_format fmt, // desired output block format + uint32_t block_x, uint32_t block_y, // 4x4 block being processed + void* pDst_blocks, // base pointer to output buffer/bitmap + uint8_t* pDst_block_u8, // pointer to output block/or first pixel to write + const color32* block_pixels, // pointer to 4x4 (16) 32bpp RGBA pixels + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, // output buffer dimensions + int channel0, int channel1, // channels to process, used by some block formats + bool high_quality, bool from_alpha, // Flags specific to certain block formats + uint32_t bc7f_flags, // Real-time bc7f BC7 encoder flags, see bc7f::cPackBC7FlagDefault etc. + etc1f::pack_etc1_state& etc1_pack_state, // etc1f thread local state + int has_alpha = -1); // has_alpha = -1 unknown, 0=definitely no (a all 255's), 1=potentially yes +#endif // BASISD_SUPPORT_XUASTC + + struct bc7_mode_5 + { + union + { + struct + { + uint64_t m_mode : 6; + uint64_t m_rot : 2; + + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 8; + uint64_t m_a1_0 : 6; + + } m_lo; + + uint64_t m_lo_bits; + }; + + union + { + struct + { + uint64_t m_a1_1 : 2; + + // bit 2 + uint64_t m_c00 : 1; + uint64_t m_c10 : 2; + uint64_t m_c20 : 2; + uint64_t m_c30 : 2; + + uint64_t m_c01 : 2; + uint64_t m_c11 : 2; + uint64_t m_c21 : 2; + uint64_t m_c31 : 2; + + uint64_t m_c02 : 2; + uint64_t m_c12 : 2; + uint64_t m_c22 : 2; + uint64_t m_c32 : 2; + + uint64_t m_c03 : 2; + uint64_t m_c13 : 2; + uint64_t m_c23 : 2; + uint64_t m_c33 : 2; + + // bit 33 + uint64_t m_a00 : 1; + uint64_t m_a10 : 2; + uint64_t m_a20 : 2; + uint64_t m_a30 : 2; + + uint64_t m_a01 : 2; + uint64_t m_a11 : 2; + uint64_t m_a21 : 2; + uint64_t m_a31 : 2; + + uint64_t m_a02 : 2; + uint64_t m_a12 : 2; + uint64_t m_a22 : 2; + uint64_t m_a32 : 2; + + uint64_t m_a03 : 2; + uint64_t m_a13 : 2; + uint64_t m_a23 : 2; + uint64_t m_a33 : 2; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; + +} // namespace basist + + + diff --git a/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_uastc.h b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_uastc.h new file mode 100644 index 000000000..c1d01864f --- /dev/null +++ b/Source/ThirdParty/basis_universal/transcoder/basisu_transcoder_uastc.h @@ -0,0 +1,294 @@ +// basisu_transcoder_uastc.h +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + struct color_quad_u8 + { + uint8_t m_c[4]; + }; + + const uint32_t TOTAL_UASTC_MODES = 19; + const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8; + + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30; + const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11; + const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19; + + extern const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES]; + + struct astc_bc7_common_partition2_desc + { + uint8_t m_bc7; + uint16_t m_astc; + bool m_invert; + }; + + extern const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2]; + + struct bc73_astc2_common_partition_desc + { + uint8_t m_bc73; + uint16_t m_astc2; + uint8_t k; // 0-5 - how to modify the BC7 3-subset pattern to match the ASTC pattern (LSB=invert) + }; + + extern const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS]; + + struct astc_bc7_common_partition3_desc + { + uint8_t m_bc7; + uint16_t m_astc; + uint8_t m_astc_to_bc7_perm; // converts ASTC to BC7 partition using g_astc_bc7_partition_index_perm_tables[][] + }; + + extern const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3]; + + extern const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16]; + extern const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16]; + extern const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16]; + + extern const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3]; + extern const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3]; + extern const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3]; + + extern const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2]; + + extern const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3]; + extern const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3]; // inverse of g_astc_to_bc7_partition_index_perm_tables + + extern const uint8_t* s_uastc_to_bc1_weights[6]; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k); + + inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w, bool srgb) + { + if (srgb) + { + l = (l << 8) | 0x80; + h = (h << 8) | 0x80; + } + else + { + l = (l << 8) | l; + h = (h << 8) | h; + } + + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + + return k >> 8; + } + + struct astc_block_desc + { + int m_weight_range; // weight BISE range + + int m_subsets; // number of ASTC partitions + int m_partition_seed; // partition pattern seed + int m_cem; // color endpoint mode used by all subsets + + int m_ccs; // color component selector (dual plane only) + bool m_dual_plane; // true if dual plane + + // Weight and endpoint BISE values. + // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order + }; + + const uint32_t BC7ENC_TOTAL_ASTC_RANGES = 21; + + // See tables 81, 93, 18.13.Endpoint Unquantization + const uint32_t TOTAL_ASTC_RANGES = 21; + extern const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3]; + + struct astc_quant_bin + { + uint8_t m_unquant; // unquantized value + uint8_t m_index; // sorted index + }; + + extern astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + int astc_get_levels(int range); + bool astc_is_valid_endpoint_range(uint32_t range); + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range); + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range); + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern); + + // BC7 + const uint32_t BC7ENC_BLOCK_SIZE = 16; + + struct bc7_block + { + uint64_t m_qwords[2]; + }; + + struct bc7_optimization_results + { + uint32_t m_mode; + uint32_t m_partition; + uint8_t m_selectors[16]; + uint8_t m_alpha_selectors[16]; + color_quad_u8 m_low[3]; + color_quad_u8 m_high[3]; + uint32_t m_pbits[3][2]; + uint32_t m_index_selector; + uint32_t m_rotation; + }; + + extern const uint32_t g_bc7_weights1[2]; + extern const uint32_t g_bc7_weights2[4]; + extern const uint32_t g_bc7_weights3[8]; + extern const uint32_t g_bc7_weights4[16]; + extern const uint32_t g_astc_weights4[16]; + extern const uint32_t g_astc_weights5[32]; + extern const uint32_t g_astc_weights_3levels[3]; + extern const uint8_t g_bc7_partition1[16]; + extern const uint8_t g_bc7_partition2[64 * 16]; + extern const uint8_t g_bc7_partition3[64 * 16]; + extern const uint8_t g_bc7_table_anchor_index_second_subset[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_1[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_2[64]; + extern const uint8_t g_bc7_num_subsets[8]; + extern const uint8_t g_bc7_partition_bits[8]; + extern const uint8_t g_bc7_color_index_bitcount[8]; + extern const uint8_t g_bc7_mode_has_p_bits[8]; + extern const uint8_t g_bc7_mode_has_shared_p_bits[8]; + extern const uint8_t g_bc7_color_precision_table[8]; + extern const int8_t g_bc7_alpha_precision_table[8]; + extern const uint8_t g_bc7_alpha_index_bitcount[8]; + + inline bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } + inline int get_bc7_color_index_size(int mode, int index_selection_bit) { return g_bc7_color_index_bitcount[mode] + index_selection_bit; } + inline int get_bc7_alpha_index_size(int mode, int index_selection_bit) { return g_bc7_alpha_index_bitcount[mode] - index_selection_bit; } + + struct endpoint_err + { + uint16_t m_error; uint8_t m_lo; uint8_t m_hi; + }; + + extern endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + const uint32_t BC7ENC_MODE_6_OPTIMAL_INDEX = 5; + + extern endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + const uint32_t BC7ENC_MODE_5_OPTIMAL_INDEX = 1; + + // Packs a BC7 block from a high-level description. Handles all BC7 modes. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults); + + // Packs an ASTC block + // Constraints: Always 4x4, all subset CEM's must be equal, only tested with LDR CEM's. + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t mode); + + void pack_astc_solid_block(void* pDst_block, const color32& color); + +#ifdef _DEBUG + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); +#endif + + struct uastc_block + { + union + { + uint8_t m_bytes[16]; + uint32_t m_dwords[4]; + }; + }; + + struct unpacked_uastc_block + { + astc_block_desc m_astc; + + uint32_t m_mode; + uint32_t m_common_pattern; + + color32 m_solid_color; + + bool m_bc1_hint0; + bool m_bc1_hint1; + + bool m_etc1_flip; + bool m_etc1_diff; + uint32_t m_etc1_inten0; + uint32_t m_etc1_inten1; + + uint32_t m_etc1_bias; + + uint32_t m_etc2_hints; + + uint32_t m_etc1_selector; + uint32_t m_etc1_r, m_etc1_g, m_etc1_b; + }; + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); + + struct decoder_etc_block; + struct eac_block; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb); + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool undo_blue_contract, bool read_hints = true); + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst); + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst); + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel); + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst); + + // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); + + enum + { + cEncodeBC1HighQuality = 1, + cEncodeBC1HigherQuality = 2, + cEncodeBC1UseSelectors = 4, + }; + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); + + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); + + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst); + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality); + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); + + // uastc_init() MUST be called before using this module. + void uastc_init(); + +} // namespace basist diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/basis_universal.cs b/Source/Tools/Flax.Build/Deps/Dependencies/basis_universal.cs new file mode 100644 index 000000000..d31040755 --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/basis_universal.cs @@ -0,0 +1,143 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System.Collections.Generic; +using System.IO; +using Flax.Build; + +namespace Flax.Deps.Dependencies +{ + /// + /// GPU compressed texture interchange system from Binomial LLC that supports two intermediate file formats. + /// + /// + class basis_universal : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + TargetPlatform.Web, + }; + default: + return new TargetPlatform[0]; + } + } + } + + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x86, + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: + return new TargetArchitecture[0]; + } + } + } + + /// + public override void Build(BuildOptions options) + { + var root = options.IntermediateFolder; + var buildDir = Path.Combine(root, "build"); + var configuration = "Release"; + var defines = new Dictionary() + { + // Disable unused formats and features + { "BASISD_SUPPORT_ATC", "0" }, + { "BASISD_SUPPORT_FXT1", "0" }, + { "BASISD_SUPPORT_PVRTC1", "0" }, + { "BASISD_SUPPORT_PVRTC2", "0" }, + { "BASISD_SUPPORT_ETC2_EAC_A8", "0" }, + { "BASISD_SUPPORT_ETC2_EAC_RG11", "0" }, + { "BASISD_SUPPORT_BC7", "0" }, + }; + + // Get the source + var commit = "2c4f52f705f697660cc7a9d481d9d496f85b9959"; // 'final snapshot of v2.0 before v2.1' + CloneGitRepo(root, "https://github.com/BinomialLLC/basis_universal.git", commit); + + // Disable KTX2 support + var headerPath = Path.Combine(root, "transcoder/basisu_transcoder.h"); + //Utilities.ReplaceInFile(headerPath, "#define BASISD_SUPPORT_KTX2 1", "#define BASISD_SUPPORT_KTX2 0"); + Utilities.ReplaceInFile(headerPath, "#define BASISD_SUPPORT_KTX2_ZSTD 1", "#define BASISD_SUPPORT_KTX2_ZSTD 0"); + + // Fix build for Web with Emscripten + var cmakeListsPath = Path.Combine(root, "CMakeLists.txt"); + Utilities.ReplaceInFile(cmakeListsPath, "if (CMAKE_SYSTEM_NAME STREQUAL \"WASI\")", "if (CMAKE_SYSTEM_NAME STREQUAL \"WASI\" OR BASISU_BUILD_WASM)"); + Utilities.ReplaceInFile(cmakeListsPath, "set(BASISU_STATIC OFF CACHE BOOL \"\" FORCE)", ""); + + { + foreach (var architecture in options.Architectures) + { + if (!BuildStarted(platform, architecture)) + continue; + + var cmakeArgs = $"-DCMAKE_BUILD_TYPE={configuration} -DBASISU_STATIC=ON -DBASISU_EXAMPLES=OFF -DBASISU_ZSTD=OFF -DBASISU_OPENCL=OFF"; + cmakeArgs += " -DCMAKE_CXX_FLAGS=\""; + if (platform == TargetPlatform.Windows) + { + foreach (var define in defines) + cmakeArgs += $"/D{define.Key}={define.Value} "; + } + else + { + foreach (var define in defines) + cmakeArgs += $"-D{define.Key}={define.Value} "; + } + cmakeArgs += "\""; + + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + SetupDirectory(buildDir, true); + switch (platform) + { + case TargetPlatform.Windows: + { + cmakeArgs = ".. -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreadedDLL " + cmakeArgs; + RunCmake(buildDir, platform, architecture, cmakeArgs); + BuildCmake(buildDir, configuration, options:Utilities.RunOptions.ConsoleLogOutput); + CopyLib(platform, Path.Combine(buildDir, configuration), depsFolder, "basisu_encoder"); + break; + } + case TargetPlatform.Web: + { + cmakeArgs = ".. -DBASISU_BUILD_WASM=ON -DBASISU_WASM_THREADING=OFF " + cmakeArgs; + RunCmake(buildDir, platform, architecture, cmakeArgs); + BuildCmake(buildDir, configuration, options:Utilities.RunOptions.ConsoleLogOutput); + CopyLib(platform, buildDir, depsFolder, "basisu_encoder"); + break; + } + } + } + } + + // Copy headers and license + SetupDirectory(Path.Combine(options.ThirdPartyFolder, "basis_universal", "encoder"), true); + SetupDirectory(Path.Combine(options.ThirdPartyFolder, "basis_universal", "transcoder"), true); + Utilities.DirectoryCopy(Path.Combine(root, "encoder"), Path.Combine(options.ThirdPartyFolder, "basis_universal", "encoder"), false, true, "*.h"); + Utilities.DirectoryCopy(Path.Combine(root, "transcoder"), Path.Combine(options.ThirdPartyFolder, "basis_universal", "transcoder"), false, true, "*.h"); + Utilities.FileCopy(Path.Combine(root, "LICENSE"), Path.Combine(options.ThirdPartyFolder, "basis_universal", "LICENSE")); + + // Fix C++17 usage + Utilities.ReplaceInFile(Path.Combine(options.ThirdPartyFolder, "basis_universal/transcoder/basisu.h"), "if constexpr", "if IF_CONSTEXPR"); + Utilities.ReplaceInFile(Path.Combine(options.ThirdPartyFolder, "basis_universal/transcoder/basisu_containers.h"), "if constexpr", "if IF_CONSTEXPR"); + Utilities.ReplaceInFile(Path.Combine(options.ThirdPartyFolder, "basis_universal/encoder/basisu_math.h"), "static_assert(", "// static_assert("); + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependency.cs b/Source/Tools/Flax.Build/Deps/Dependency.cs index 842a57b20..dc799702d 100644 --- a/Source/Tools/Flax.Build/Deps/Dependency.cs +++ b/Source/Tools/Flax.Build/Deps/Dependency.cs @@ -185,9 +185,12 @@ namespace Flax.Deps /// Logs build process start. /// /// Target platform. - protected void BuildStarted(TargetPlatform platform, TargetArchitecture architecture) + /// Target architecture. + /// True if build for the target platform, otherwise false if that architecture should be skipped. + protected bool BuildStarted(TargetPlatform platform, TargetArchitecture architecture) { Log.Info($"Building {GetType().Name} for {platform}{(architecture != TargetArchitecture.AnyCPU ? $" ({architecture})" : "")}"); + return Platform.IsPlatformSupported(platform, architecture); } /// @@ -250,6 +253,42 @@ namespace Flax.Deps Utilities.DirectoryCopy(src, dst); } + /// + /// Copies the library to the deps files (handles platform specific switches). Matches . + /// + /// The build platform. + /// The path fo the source folder with library (build output). + /// The path fo the source folder with library. + /// The library name. + public static void CopyLib(TargetPlatform platform, string srcFolder, string dstFolder, string name) + { + string filename, pdbPath; + switch (platform) + { + case TargetPlatform.Windows: + case TargetPlatform.XboxOne: + case TargetPlatform.UWP: + case TargetPlatform.XboxScarlett: + filename = string.Format("{0}.lib", name); + pdbPath = Path.Combine(srcFolder, string.Format("{0}.pdb", name)); + if (File.Exists(pdbPath)) + Utilities.FileCopy(pdbPath, Path.Combine(dstFolder, string.Format("{0}.pdb", name)), true); + break; + case TargetPlatform.Linux: + case TargetPlatform.PS4: + case TargetPlatform.PS5: + case TargetPlatform.Android: + case TargetPlatform.Switch: + case TargetPlatform.Mac: + case TargetPlatform.iOS: + case TargetPlatform.Web: + filename = string.Format("lib{0}.a", name); + break; + default: throw new InvalidPlatformException(platform); + } + Utilities.FileCopy(Path.Combine(srcFolder, filename), Path.Combine(dstFolder, filename), true); + } + /// /// Checks if git repository exists at given path. /// @@ -429,9 +468,10 @@ namespace Flax.Deps /// The path. /// The configuration preset. /// Custom environment variables to pass to the child process. - public static void BuildCmake(string path, string config = "Release", Dictionary envVars = null) + /// Defines the options how to run. See RunOptions. + public static void BuildCmake(string path, string config = "Release", Dictionary envVars = null, Utilities.RunOptions options = Utilities.RunOptions.DefaultTool) { - Utilities.Run("cmake", $"--build . --config {config}", null, path, Utilities.RunOptions.DefaultTool, envVars); + Utilities.Run("cmake", $"--build . --config {config}", null, path, options, envVars); } /// @@ -514,6 +554,13 @@ namespace Flax.Deps cmdLine = string.Format("CMakeLists.txt -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_DEPLOYMENT_TARGET=\"{0}\" -DCMAKE_OSX_ARCHITECTURES={1}", Configuration.iOSMinVer, arch); break; } + case TargetPlatform.Web: + { + cmdLine = string.Format("CMakeLists.txt -DCMAKE_TOOLCHAIN_FILE=\"{0}/emscripten/cmake/Modules/Platform/Emscripten.cmake\"", EmscriptenSdk.Instance.EmscriptenPath); + if (BuildPlatform == TargetPlatform.Windows) + cmdLine += " -G \"Ninja\""; + break; + } default: throw new InvalidPlatformException(platform); }