diff --git a/Source/Engine/Tools/TextureTool/TextureTool.cpp b/Source/Engine/Tools/TextureTool/TextureTool.cpp index 56aa9295f..8dc94deb3 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.cpp @@ -460,6 +460,21 @@ TextureTool::PixelFormatSampler PixelFormatSamplers[] = *(Color32*)ptr = Color32(srgb); }, }, + { + PixelFormat::R8G8_UNorm, + sizeof(uint16), + [](const void* ptr) + { + const uint8* rg = (const uint8*)ptr; + return Color((float)rg[0] / MAX_uint8, (float)rg[1] / MAX_uint8, 0, 1); + }, + [](const void* ptr, const Color& color) + { + uint8* rg = (uint8*)ptr; + rg[0] = (uint8)(color.R * MAX_uint8); + rg[1] = (uint8)(color.G * MAX_uint8); + }, + }, { PixelFormat::R16G16_Float, sizeof(Half2), diff --git a/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp b/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp index 64de58572..86bdd49e1 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.stb.cpp @@ -63,7 +63,21 @@ static TextureData const* stbDecompress(const TextureData& textureData, TextureD { if (!PixelFormatExtensions::IsCompressed(textureData.Format)) return &textureData; - decompressed.Format = PixelFormatExtensions::IsSRGB(textureData.Format) ? PixelFormat::R8G8B8A8_UNorm_sRGB : PixelFormat::R8G8B8A8_UNorm; + const bool srgb = PixelFormatExtensions::IsSRGB(textureData.Format); + switch (textureData.Format) + { + case PixelFormat::BC4_UNorm: + case PixelFormat::BC4_SNorm: + decompressed.Format = PixelFormat::R8_UNorm; + break; + case PixelFormat::BC5_UNorm: + case PixelFormat::BC5_SNorm: + decompressed.Format = PixelFormat::R8G8_UNorm; + break; + default: + decompressed.Format = srgb ? PixelFormat::R8G8B8A8_UNorm_sRGB : PixelFormat::R8G8B8A8_UNorm; + break; + } decompressed.Width = textureData.Width; decompressed.Height = textureData.Height; decompressed.Depth = textureData.Depth; @@ -71,7 +85,7 @@ static TextureData const* stbDecompress(const TextureData& textureData, TextureD decompressed.Items[0].Mips.Resize(1); TextureMipData* decompressedData = decompressed.GetData(0, 0); - decompressedData->RowPitch = textureData.Width * sizeof(Color32); + decompressedData->RowPitch = textureData.Width * PixelFormatExtensions::SizeInBytes(decompressed.Format); decompressedData->Lines = textureData.Height; decompressedData->DepthPitch = decompressedData->RowPitch * decompressedData->Lines; decompressedData->Data.Allocate(decompressedData->DepthPitch); @@ -83,72 +97,74 @@ static TextureData const* stbDecompress(const TextureData& textureData, TextureD const TextureMipData* blocksData = textureData.GetData(0, 0); const byte* blocksBytes = blocksData->Data.Get(); + typedef bool (*detexDecompressBlockFuncType)(const uint8_t* bitstring, uint32_t mode_mask, uint32_t flags, uint8_t* pixel_buffer); + detexDecompressBlockFuncType detexDecompressBlockFunc; + int32 pixelSize, blockSize; switch (textureData.Format) { case PixelFormat::BC1_UNorm: case PixelFormat::BC1_UNorm_sRGB: - { - for (int32 yBlock = 0; yBlock < blocksHeight; yBlock++) - { - for (int32 xBlock = 0; xBlock < blocksWidth; xBlock++) - { - const byte* block = blocksBytes + yBlock * blocksData->RowPitch + xBlock * 8; - detexDecompressBlockBC1(block, 0, 0, (byte*)&colors); - for (int32 y = 0; y < 4; y++) - { - for (int32 x = 0; x < 4; x++) - { - *((Color32*)decompressedBytes + (yBlock * 4 + y) * textureData.Width + (xBlock * 4 + x)) = colors[y * 4 + x]; - } - } - } - } + detexDecompressBlockFunc = detexDecompressBlockBC1; + pixelSize = 4; + blockSize = 8; break; - } case PixelFormat::BC2_UNorm: case PixelFormat::BC2_UNorm_sRGB: - { - for (int32 yBlock = 0; yBlock < blocksHeight; yBlock++) - { - for (int32 xBlock = 0; xBlock < blocksWidth; xBlock++) - { - const byte* block = blocksBytes + yBlock * blocksData->RowPitch + xBlock * 16; - detexDecompressBlockBC2(block, 0, 0, (byte*)&colors); - for (int32 y = 0; y < 4; y++) - { - for (int32 x = 0; x < 4; x++) - { - *((Color32*)decompressedBytes + (yBlock * 4 + y) * textureData.Width + (xBlock * 4 + x)) = colors[y * 4 + x]; - } - } - } - } + detexDecompressBlockFunc = detexDecompressBlockBC2; + pixelSize = 4; + blockSize = 16; break; - } case PixelFormat::BC3_UNorm: case PixelFormat::BC3_UNorm_sRGB: - { - for (int32 yBlock = 0; yBlock < blocksHeight; yBlock++) - { - for (int32 xBlock = 0; xBlock < blocksWidth; xBlock++) - { - const byte* block = blocksBytes + yBlock * blocksData->RowPitch + xBlock * 16; - detexDecompressBlockBC3(block, 0, 0, (byte*)&colors); - for (int32 y = 0; y < 4; y++) - { - for (int32 x = 0; x < 4; x++) - { - *((Color32*)decompressedBytes + (yBlock * 4 + y) * textureData.Width + (xBlock * 4 + x)) = colors[y * 4 + x]; - } - } - } - } + detexDecompressBlockFunc = detexDecompressBlockBC3; + pixelSize = 4; + blockSize = 16; + break; + case PixelFormat::BC4_UNorm: + detexDecompressBlockFunc = detexDecompressBlockRGTC1; + pixelSize = 1; + blockSize = 8; + break; + case PixelFormat::BC5_UNorm: + detexDecompressBlockFunc = detexDecompressBlockRGTC2; + pixelSize = 2; + blockSize = 16; + break; + case PixelFormat::BC7_UNorm: + case PixelFormat::BC7_UNorm_sRGB: + detexDecompressBlockFunc = detexDecompressBlockBPTC; + pixelSize = 4; + blockSize = 16; break; - } default: - LOG(Warning, "Texture data format {0} is not supported by stb library.", (int32)textureData.Format); + LOG(Warning, "Texture data format {0} is not supported by detex library.", (int32)textureData.Format); return nullptr; } + + uint8 blockBuffer[DETEX_MAX_BLOCK_SIZE]; + for (int32 y = 0; y < blocksHeight; y++) + { + int32 rows; + if (y * 4 + 3 >= textureData.Height) + rows = textureData.Height - y * 4; + else + rows = 4; + for (int32 x = 0; x < blocksWidth; x++) + { + const byte* block = blocksBytes + y * blocksData->RowPitch + x * blockSize; + if (!detexDecompressBlockFunc(block, DETEX_MODE_MASK_ALL, 0, blockBuffer)) + memset(blockBuffer, 0, DETEX_MAX_BLOCK_SIZE); + uint8* pixels = decompressedBytes + y * 4 * textureData.Width * pixelSize + x * 4 * pixelSize; + int32 columns; + if (x * 4 + 3 >= textureData.Width) + columns = textureData.Width - x * 4; + else + columns = 4; + for (int32 row = 0; row < rows; row++) + memcpy(pixels + row * textureData.Width * pixelSize, blockBuffer + row * 4 * pixelSize, columns * pixelSize); + } + } + return &decompressed; } diff --git a/Source/ThirdParty/detex/bits.cpp b/Source/ThirdParty/detex/bits.cpp new file mode 100644 index 000000000..f223f2054 --- /dev/null +++ b/Source/ThirdParty/detex/bits.cpp @@ -0,0 +1,45 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +#include "detex.h" +#include "bits.h" + +uint32_t detexBlock128ExtractBits(detexBlock128 *block, int nu_bits) { + uint32_t value = 0; + for (int i = 0; i < nu_bits; i++) { + if (block->index < 64) { + int shift = block->index - i; + if (shift < 0) + value |= (block->data0 & ((uint64_t)1 << block->index)) << (- shift); + else + value |= (block->data0 & ((uint64_t)1 << block->index)) >> shift; + } + else { + int shift = ((block->index - 64) - i); + if (shift < 0) + value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) << (- shift); + else + value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) >> shift; + } + block->index++; + } +// if (block->index > 128) +// printf("Block overflow (%d)\n", block->index); + return value; +} + diff --git a/Source/ThirdParty/detex/bits.h b/Source/ThirdParty/detex/bits.h new file mode 100644 index 000000000..39350e145 --- /dev/null +++ b/Source/ThirdParty/detex/bits.h @@ -0,0 +1,62 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +/* Data structure used to extract bits from 128-bit bitstring. */ + +typedef struct { + uint64_t data0; + uint64_t data1; + int index; +} detexBlock128; + +uint32_t detexBlock128ExtractBits(detexBlock128 *block, int nu_bits); + +/* Return bitfield from bit0 to bit1 from 64-bit bitstring. */ +static DETEX_INLINE_ONLY uint32_t detexGetBits64(uint64_t data, int bit0, int bit1) { + return (data & (((uint64_t)1 << (bit1 + 1)) - 1)) >> bit0; +} + +/* Return reversed bitfield (bit1 to bit0) from 64-bit bitstring. */ +static DETEX_INLINE_ONLY uint32_t detexGetBits64Reversed(uint64_t data, int bit0, int bit1) { + // Assumes bit0 > bit1. + // Reverse the bits. + uint32_t val = 0; + for (int i = 0; i <= bit0 - bit1; i++) { + int shift_right = bit0 - 2 * i; + if (shift_right >= 0) + val |= (data & ((uint64_t)1 << (bit0 - i))) >> shift_right; + else + val |= (data & ((uint64_t)1 << (bit0 - i))) << (- shift_right); + } + return val; +} + +/* Clear bit0 to bit1 of 64-bit bitstring. */ +static DETEX_INLINE_ONLY uint64_t detexClearBits64(uint64_t data, int bit0, int bit1) { + uint64_t mask = ~(((uint64_t)1 << (bit1 + 1)) - 1); + mask |= ((uint64_t)1 << bit0) - 1; + return data & mask; +} + +/* Set bit0 to bit1 of 64-bit bitstring. */ +static DETEX_INLINE_ONLY uint64_t detexSetBits64(uint64_t data, int bit0, int bit1, uint64_t val) { + uint64_t d = detexClearBits64(data, bit0, bit1); + d |= val << bit0; + return d; +} + diff --git a/Source/ThirdParty/detex/bptc-tables.cpp b/Source/ThirdParty/detex/bptc-tables.cpp new file mode 100644 index 000000000..ad29a1e89 --- /dev/null +++ b/Source/ThirdParty/detex/bptc-tables.cpp @@ -0,0 +1,203 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +#include "detex.h" +#include "bits.h" +#include "bptc-tables.h" + +const uint8_t detex_bptc_table_P2[64 * 16] = { + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, + 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, + 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, + 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, + 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, + 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, + 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, + 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, + 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, + 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, + 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, + 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, + 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, + 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, + 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, + 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, + 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, + 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, + 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, + 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, + 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, + 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, + 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, + 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, + 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, + 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, + 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, + 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, + 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, + 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, + 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, + 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, + 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, + 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, + 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, + 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, + 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, + 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, + 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, + 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, + 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, + 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, + 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, + 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, + 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, + 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, + 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 +}; + +const uint8_t detex_bptc_table_P3[64 * 16] = { + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, + 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, + 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, + 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, + 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, + 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, + 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, + 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, + 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, + 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, + 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, + 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, + 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, + 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, + 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, + 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, + 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, + 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, + 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, + 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, + 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, + 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, + 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, + 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, + 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, + 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, + 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, + 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, + 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, + 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, + 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, + 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, + 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, + 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, + 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, + 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, + 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, + 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, + 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, + 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, + 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, + 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, + 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, + 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, + 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, + 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, + 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, + 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, + 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, + 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, + 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, + 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, + 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, + 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, + 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, +}; + +const uint8_t detex_bptc_table_anchor_index_second_subset[64] = { + 15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15, + 15, 2, 8, 2, 2, 8, 8,15, + 2, 8, 2, 2, 8, 8, 2, 2, + 15,15, 6, 8, 2, 8,15,15, + 2, 8, 2, 2, 2,15,15, 6, + 6, 2, 6, 8,15,15, 2, 2, + 15,15,15,15,15, 2, 2,15 +}; + +const uint8_t detex_bptc_table_anchor_index_second_subset_of_three[64] = { + 3, 3,15,15, 8, 3,15,15, + 8, 8, 6, 6, 6, 5, 3, 3, + 3, 3, 8,15, 3, 3, 6,10, + 5, 8, 8, 6, 8, 5,15,15, + 8,15, 3, 5, 6,10, 8,15, + 15, 3,15, 5,15,15,15,15, + 3,15, 5, 5, 5, 8, 5,10, + 5,10, 8,13,15,12, 3, 3 +}; + +const uint8_t detex_bptc_table_anchor_index_third_subset[64] = { + 15, 8, 8, 3,15,15, 3, 8, + 15,15,15,15,15,15,15, 8, + 15, 8,15, 3,15, 8,15, 8, + 3,15, 6,10,15,15,10, 8, + 15, 3,15,10,10, 8, 9,10, + 6,15, 8,15, 3, 6, 6, 8, + 15, 3,15,15,15,15,15,15, + 15,15,15,15, 3,15,15, 8 +}; + +const uint16_t detex_bptc_table_aWeight2[4] = { + 0, 21, 43, 64 +}; + +const uint16_t detex_bptc_table_aWeight3[8] = { + 0, 9, 18, 27, 37, 46, 55, 64 +}; + +const uint16_t detex_bptc_table_aWeight4[16] = { + 0, 4, 9, 13, 17, 21, 26, 30, + 34, 38, 43, 47, 51, 55, 60, 64 +}; + + diff --git a/Source/ThirdParty/detex/bptc-tables.h b/Source/ThirdParty/detex/bptc-tables.h new file mode 100644 index 000000000..ae11b2af0 --- /dev/null +++ b/Source/ThirdParty/detex/bptc-tables.h @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +extern const uint8_t detex_bptc_table_P2[64 * 16]; +extern const uint8_t detex_bptc_table_P3[64 * 16]; + +extern const uint8_t detex_bptc_table_anchor_index_second_subset[64]; +extern const uint8_t detex_bptc_table_anchor_index_second_subset_of_three[64]; +extern const uint8_t detex_bptc_table_anchor_index_third_subset[64]; + +extern const uint16_t detex_bptc_table_aWeight2[4]; +extern const uint16_t detex_bptc_table_aWeight3[8]; +extern const uint16_t detex_bptc_table_aWeight4[16]; + diff --git a/Source/ThirdParty/detex/decompress-bptc.cpp b/Source/ThirdParty/detex/decompress-bptc.cpp new file mode 100644 index 000000000..618220561 --- /dev/null +++ b/Source/ThirdParty/detex/decompress-bptc.cpp @@ -0,0 +1,623 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +#include "detex.h" +#include "bits.h" +#include "bptc-tables.h" + +// BPTC mode layout: +// +// Number of subsets = { 3, 2, 3, 2, 1, 1, 1, 2 }; +// Partition bits = { 4, 6, 6, 6, 0, 0, 0, 6 }; +// Rotation bits = { 0, 0, 0, 0, 2, 2, 0, 0 }; +// Mode 4 has one index selection bit. +// +// #subsets color alpha before color index after color index after After Index +// alpha pbits bits (*) +// Mode 0 3 4 0 1 + 4 = 5 5 + 6 * 3 * 4 = 77 77 + 6 = 83 + 48 - 3 = 128 +// Mode 1 2 6 0 2 + 6 = 8 8 + 4 * 3 * 6 = 80 80 + 2 = 82 + 48 - 2 = 128 +// Mode 2 3 5 0 3 + 6 = 9 9 + 6 * 3 * 5 = 99 99 99 + 32 - 3 = 128 +// Mode 3 2 7 0 4 + 6 = 10 10 + 4 * 3 * 7 = 94 94 + 4 = 98 + 32 - 2 = 128 +// Mode 4 1 5 6 5 + 2 + 1 = 8 8 + 2 * 3 * 5 = 38 37 + 2 * 6 = 50 50 + 80 - 2 = 128 +// Mode 5 1 7 8 6 + 2 = 8 8 + 2 * 3 * 7 = 50 50 + 2 * 8 = 66 66 + 64 - 2 = 128 +// Mode 6 1 7 7 7 7 + 2 * 3 * 7 = 49 49 + 2 * 7 = 63 + 2 = 65 + 64 - 1 = 128 +// Mode 7 2 5 5 8 + 6 = 14 14 + 4 * 3 * 5 = 74 74 + 4 * 5 = 94 + 4 = 98 + 32 - 2 = 128 +// +// (*) For formats without alpha, the number of index bits is reduced by #subsets anchor bits. +// For formats with alpha, the number of index bits is reduced by 2 * #subsets by the anchor bits. + + +static const uint8_t color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; + +// Note: precision includes P-bits! +static const uint8_t color_precision_plus_pbit_table[8] = { 5, 7, 5, 8, 5, 7, 8, 6 }; + +static DETEX_INLINE_ONLY uint8_t GetColorComponentPrecision(int mode) { + return color_precision_table[mode]; +} + +static DETEX_INLINE_ONLY uint8_t GetColorComponentPrecisionPlusPbit(int mode) { + return color_precision_plus_pbit_table[mode]; +} + +static const int8_t alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; + +// Note: precision include P-bits! +static const uint8_t alpha_precision_plus_pbit_table[8] = { 0, 0, 0, 0, 6, 8, 8, 6 }; + +static DETEX_INLINE_ONLY uint8_t GetAlphaComponentPrecision(int mode) { + return alpha_precision_table[mode]; +} + +static DETEX_INLINE_ONLY uint8_t GetAlphaComponentPrecisionPlusPbit(int mode) { + return alpha_precision_plus_pbit_table[mode]; +} + +static const int8_t components_in_qword0_table[8] = { 2, -1, 1, 1, 3, 3, 3, 2 }; + +/* Extract endpoint colors. */ +static void ExtractEndpoints(int mode, int nu_subsets, detexBlock128 * DETEX_RESTRICT block, +uint8_t * DETEX_RESTRICT endpoint_array) { + // Optimized version avoiding the use of block_extract_bits(). + int components_in_qword0 = components_in_qword0_table[mode]; + uint64_t data = block->data0 >> block->index; + uint8_t precision = GetColorComponentPrecision(mode); + uint8_t mask = (1 << precision) - 1; + int total_bits_per_component = nu_subsets * 2 * precision; + for (int i = 0; i < components_in_qword0; i++) // For each color component. + for (int j = 0; j < nu_subsets; j++) // For each subset. + for (int k = 0; k < 2; k++) { // For each endpoint. + endpoint_array[j * 8 + k * 4 + i] = data & mask; + data >>= precision; + } + block->index += components_in_qword0 * total_bits_per_component; + if (components_in_qword0 < 3) { + // Handle the color component that crosses the boundary between data0 and data1 + data = block->data0 >> block->index; + data |= block->data1 << (64 - block->index); + int i = components_in_qword0; + for (int j = 0; j < nu_subsets; j++) // For each subset. + for (int k = 0; k < 2; k++) { // For each endpoint. + endpoint_array[j * 8 + k * 4 + i] = data & mask; + data >>= precision; + } + block->index += total_bits_per_component; + } + if (components_in_qword0 < 2) { + // Handle the color component that is wholly in data1. + data = block->data1 >> (block->index - 64); + int i = 2; + for (int j = 0; j < nu_subsets; j++) // For each subset. + for (int k = 0; k < 2; k++) { // For each endpoint. + endpoint_array[j * 8 + k * 4 + i] = data & mask; + data >>= precision; + } + block->index += total_bits_per_component; + } + // Alpha component. + if (GetAlphaComponentPrecision(mode) > 0) { + // For mode 7, the alpha data is wholly in data1. + // For modes 4 and 6, the alpha data is wholly in data0. + // For mode 5, the alpha data is in data0 and data1. + if (mode == 7) + data = block->data1 >> (block->index - 64); + else if (mode == 5) + data = (block->data0 >> block->index) | ((block->data1 & 0x3) << 14); + else + data = block->data0 >> block->index; + uint8_t alpha_precision = GetAlphaComponentPrecision(mode); + uint8_t mask = (1 << alpha_precision) - 1; + for (int j = 0; j < nu_subsets; j++) + for (int k = 0; k < 2; k++) { // For each endpoint. + endpoint_array[j * 8 + k * 4 + 3] = data & mask; + data >>= alpha_precision; + } + block->index += nu_subsets * 2 * alpha_precision; + } +} + +static const uint8_t mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; + +static void FullyDecodeEndpoints(uint8_t * DETEX_RESTRICT endpoint_array, int nu_subsets, +int mode, detexBlock128 * DETEX_RESTRICT block) { + if (mode_has_p_bits[mode]) { + // Mode 1 (shared P-bits) handled elsewhere. + // Extract end-point P-bits. Take advantage of the fact that they don't cross the + // 64-bit word boundary in any mode. + uint32_t bits; + if (block->index < 64) + bits = block->data0 >> block->index; + else + bits = block->data1 >> (block->index - 64); + for (int i = 0; i < nu_subsets * 2; i++) { + endpoint_array[i * 4 + 0] <<= 1; + endpoint_array[i * 4 + 1] <<= 1; + endpoint_array[i * 4 + 2] <<= 1; + endpoint_array[i * 4 + 3] <<= 1; + endpoint_array[i * 4 + 0] |= (bits & 1); + endpoint_array[i * 4 + 1] |= (bits & 1); + endpoint_array[i * 4 + 2] |= (bits & 1); + endpoint_array[i * 4 + 3] |= (bits & 1); + bits >>= 1; + } + block->index += nu_subsets * 2; + } + int color_prec = GetColorComponentPrecisionPlusPbit(mode); + int alpha_prec = GetAlphaComponentPrecisionPlusPbit(mode); + for (int i = 0; i < nu_subsets * 2; i++) { + // Color_component_precision & alpha_component_precision includes pbit + // left shift endpoint components so that their MSB lies in bit 7 + endpoint_array[i * 4 + 0] <<= (8 - color_prec); + endpoint_array[i * 4 + 1] <<= (8 - color_prec); + endpoint_array[i * 4 + 2] <<= (8 - color_prec); + endpoint_array[i * 4 + 3] <<= (8 - alpha_prec); + + // Replicate each component's MSB into the LSBs revealed by the left-shift operation above. + endpoint_array[i * 4 + 0] |= (endpoint_array[i * 4 + 0] >> color_prec); + endpoint_array[i * 4 + 1] |= (endpoint_array[i * 4 + 1] >> color_prec); + endpoint_array[i * 4 + 2] |= (endpoint_array[i * 4 + 2] >> color_prec); + endpoint_array[i * 4 + 3] |= (endpoint_array[i * 4 + 3] >> alpha_prec); + } + if (mode <= 3) { + for (int i = 0; i < nu_subsets * 2; i++) + endpoint_array[i * 4 + 3] = 0xFF; + } +} + +static uint8_t Interpolate(uint8_t e0, uint8_t e1, uint8_t index, uint8_t indexprecision) { + if (indexprecision == 2) + return (uint8_t) (((64 - detex_bptc_table_aWeight2[index]) * (uint16_t)e0 + + detex_bptc_table_aWeight2[index] * (uint16_t)e1 + 32) >> 6); + else + if (indexprecision == 3) + return (uint8_t) (((64 - detex_bptc_table_aWeight3[index]) * (uint16_t)e0 + + detex_bptc_table_aWeight3[index] * (uint16_t)e1 + 32) >> 6); + else // indexprecision == 4 + return (uint8_t) (((64 - detex_bptc_table_aWeight4[index]) * (uint16_t)e0 + + detex_bptc_table_aWeight4[index] * (uint16_t)e1 + 32) >> 6); +} + +static const uint8_t bptc_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 }; + +static DETEX_INLINE_ONLY int GetColorIndexBitcount(int mode, int index_selection_bit) { + // If the index selection bit is set for mode 4, return 3, otherwise 2. + return bptc_color_index_bitcount[mode] + index_selection_bit; +} + +static uint8_t bptc_alpha_index_bitcount[8] = { 3, 3, 2, 2, 3, 2, 4, 2}; + +static DETEX_INLINE_ONLY int GetAlphaIndexBitcount(int mode, int index_selection_bit) { + // If the index selection bit is set for mode 4, return 2, otherwise 3. + return bptc_alpha_index_bitcount[mode] - index_selection_bit; +} + +static const uint8_t bptc_NS[8] = { 3, 2, 3, 2, 1, 1, 1, 2 }; + +static DETEX_INLINE_ONLY int GetNumberOfSubsets(int mode) { + return bptc_NS[mode]; +} + +static const uint8_t PB[8] = { 4, 6, 6, 6, 0, 0, 0, 6 }; + +static DETEX_INLINE_ONLY int GetNumberOfPartitionBits(int mode) { + return PB[mode]; +} + +static const uint8_t RB[8] = { 0, 0, 0, 0, 2, 2, 0, 0 }; + +static DETEX_INLINE_ONLY int GetNumberOfRotationBits(int mode) { + return RB[mode]; +} + +// Functions to extract parameters. */ + +static int ExtractMode(detexBlock128 *block) { + for (int i = 0; i < 8; i++) + if (block->data0 & ((uint64_t)1 << i)) { + block->index = i + 1; + return i; + } + // Illegal. + return - 1; +} + +static DETEX_INLINE_ONLY int ExtractPartitionSetID(detexBlock128 *block, int mode) { + return detexBlock128ExtractBits(block, GetNumberOfPartitionBits(mode)); +} + +static DETEX_INLINE_ONLY int GetPartitionIndex(int nu_subsets, int partition_set_id, int i) { + if (nu_subsets == 1) + return 0; + if (nu_subsets == 2) + return detex_bptc_table_P2[partition_set_id * 16 + i]; + return detex_bptc_table_P3[partition_set_id * 16 + i]; +} + +static DETEX_INLINE_ONLY int ExtractRotationBits(detexBlock128 *block, int mode) { + return detexBlock128ExtractBits(block, GetNumberOfRotationBits(mode)); +} + +static DETEX_INLINE_ONLY int GetAnchorIndex(int partition_set_id, int partition, int nu_subsets) { + if (partition == 0) + return 0; + if (nu_subsets == 2) + return detex_bptc_table_anchor_index_second_subset[partition_set_id]; + if (partition == 1) + return detex_bptc_table_anchor_index_second_subset_of_three[partition_set_id]; + return detex_bptc_table_anchor_index_third_subset[partition_set_id]; +} + +static const uint8_t IB[8] = { 3, 3, 2, 2, 2, 2, 4, 2 }; +static const uint8_t IB2[8] = { 0, 0, 0, 0, 3, 2, 0, 0 }; +static const uint8_t mode_has_partition_bits[8] = { 1, 1, 1, 1, 0, 0, 0, 1 }; + +/* Decompress a 128-bit 4x4 pixel texture block compressed using BPTC mode 1. */ + +static bool DecompressBlockBPTCMode1(detexBlock128 * DETEX_RESTRICT block, +uint8_t * DETEX_RESTRICT pixel_buffer) { + uint64_t data0 = block->data0; + uint64_t data1 = block->data1; + int partition_set_id = detexGetBits64(data0, 2, 7); + uint8_t endpoint[2 * 2 * 3]; // 2 subsets. + endpoint[0] = detexGetBits64(data0, 8, 13); // red, subset 0, endpoint 0 + endpoint[3] = detexGetBits64(data0, 14, 19); // red, subset 0, endpoint 1 + endpoint[6] = detexGetBits64(data0, 20, 25); // red, subset 1, endpoint 0 + endpoint[9] = detexGetBits64(data0, 26, 31); // red, subset 1, endpoint 1 + endpoint[1] = detexGetBits64(data0, 32, 37); // green, subset 0, endpoint 0 + endpoint[4] = detexGetBits64(data0, 38, 43); // green, subset 0, endpoint 1 + endpoint[7] = detexGetBits64(data0, 44, 49); // green, subset 1, endpoint 0 + endpoint[10] = detexGetBits64(data0, 50, 55); // green, subset 1, endpoint 1 + endpoint[2] = detexGetBits64(data0, 56, 61); // blue, subset 0, endpoint 0 + endpoint[5] = detexGetBits64(data0, 62, 63) // blue, subset 0, endpoint 1 + | (detexGetBits64(data1, 0, 3) << 2); + endpoint[8] = detexGetBits64(data1, 4, 9); // blue, subset 1, endpoint 0 + endpoint[11] = detexGetBits64(data1, 10, 15); // blue, subset 1, endpoint 1 + // Decode endpoints. + for (int i = 0; i < 2 * 2; i++) { + //component-wise left-shift + endpoint[i * 3 + 0] <<= 2; + endpoint[i * 3 + 1] <<= 2; + endpoint[i * 3 + 2] <<= 2; + } + // P-bit is shared. + uint8_t pbit_zero = detexGetBits64(data1, 16, 16) << 1; + uint8_t pbit_one = detexGetBits64(data1, 17, 17) << 1; + // RGB only pbits for mode 1, one for each subset. + for (int j = 0; j < 3; j++) { + endpoint[0 * 3 + j] |= pbit_zero; + endpoint[1 * 3 + j] |= pbit_zero; + endpoint[2 * 3 + j] |= pbit_one; + endpoint[3 * 3 + j] |= pbit_one; + } + for (int i = 0; i < 2 * 2; i++) { + // Replicate each component's MSB into the LSB. + endpoint[i * 3 + 0] |= endpoint[i * 3 + 0] >> 7; + endpoint[i * 3 + 1] |= endpoint[i * 3 + 1] >> 7; + endpoint[i * 3 + 2] |= endpoint[i * 3 + 2] >> 7; + } + + uint8_t subset_index[16]; + for (int i = 0; i < 16; i++) + // subset_index[i] is a number from 0 to 1. + subset_index[i] = detex_bptc_table_P2[partition_set_id * 16 + i]; + uint8_t anchor_index[2]; + anchor_index[0] = 0; + anchor_index[1] = detex_bptc_table_anchor_index_second_subset[partition_set_id]; + uint8_t color_index[16]; + // Extract primary index bits. + data1 >>= 18; + for (int i = 0; i < 16; i++) + if (i == anchor_index[subset_index[i]]) { + // Highest bit is zero. + color_index[i] = data1 & 3; // Get two bits. + data1 >>= 2; + } + else { + color_index[i] = data1 & 7; // Get three bits. + data1 >>= 3; + } + uint32_t *pixel32_buffer = (uint32_t *)pixel_buffer; + for (int i = 0; i < 16; i++) { + uint8_t endpoint_start[3]; + uint8_t endpoint_end[3]; + for (int j = 0; j < 3; j++) { + endpoint_start[j] = endpoint[2 * subset_index[i] * 3 + j]; + endpoint_end[j] = endpoint[(2 * subset_index[i] + 1) * 3 + j]; + } + uint32_t output; + output = detexPack32R8(Interpolate(endpoint_start[0], endpoint_end[0], color_index[i], 3)); + output |= detexPack32G8(Interpolate(endpoint_start[1], endpoint_end[1], color_index[i], 3)); + output |= detexPack32B8(Interpolate(endpoint_start[2], endpoint_end[2], color_index[i], 3)); + output |= detexPack32A8(0xFF); + pixel32_buffer[i] = output; + } + return true; +} + +/* Decompress a 128-bit 4x4 pixel texture block compressed using the BPTC */ +/* (BC7) format. */ +bool detexDecompressBlockBPTC(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask, +uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) { + detexBlock128 block; + block.data0 = *(uint64_t *)&bitstring[0]; + block.data1 = *(uint64_t *)&bitstring[8]; + block.index = 0; + int mode = ExtractMode(&block); + if (mode == - 1) + return 0; + // Allow compression tied to specific modes (according to mode_mask). + if (!(mode_mask & ((int)1 << mode))) + return 0; + if (mode >= 4 && (flags & DETEX_DECOMPRESS_FLAG_OPAQUE_ONLY)) + return 0; + if (mode < 4 && (flags & DETEX_DECOMPRESS_FLAG_NON_OPAQUE_ONLY)) + return 0; + if (mode == 1) + return DecompressBlockBPTCMode1(&block, pixel_buffer); + + int nu_subsets = 1; + int partition_set_id = 0; + if (mode_has_partition_bits[mode]) { + nu_subsets = GetNumberOfSubsets(mode); + partition_set_id = ExtractPartitionSetID(&block, mode); + } + int rotation = ExtractRotationBits(&block, mode); + int index_selection_bit = 0; + if (mode == 4) + index_selection_bit = detexBlock128ExtractBits(&block, 1); + + int alpha_index_bitcount = GetAlphaIndexBitcount(mode, index_selection_bit); + int color_index_bitcount = GetColorIndexBitcount(mode, index_selection_bit); + + uint8_t endpoint_array[3 * 2 * 4]; // Max. 3 subsets. + ExtractEndpoints(mode, nu_subsets, &block, endpoint_array); + FullyDecodeEndpoints(endpoint_array, nu_subsets, mode, &block); + + uint8_t subset_index[16]; + for (int i = 0; i < 16; i++) + // subset_index[i] is a number from 0 to 2, or 0 to 1, or 0 depending on the number of subsets. + subset_index[i] = GetPartitionIndex(nu_subsets, partition_set_id, i); + uint8_t anchor_index[4]; // Only need max. 3 elements. + for (int i = 0; i < nu_subsets; i++) + anchor_index[i] = GetAnchorIndex(partition_set_id, i, nu_subsets); + uint8_t color_index[16]; + uint8_t alpha_index[16]; + // Extract primary index bits. + uint64_t data1; + if (block.index >= 64) { + // Because the index bits are all in the second 64-bit word, there is no need to use + // block_extract_bits(). + // This implies the mode is not 4. + data1 = block.data1 >> (block.index - 64); + uint8_t mask1 = (1 << IB[mode]) - 1; + uint8_t mask2 = (1 << (IB[mode] - 1)) - 1; + for (int i = 0; i < 16; i++) + if (i == anchor_index[subset_index[i]]) { + // Highest bit is zero. + color_index[i] = data1 & mask2; + data1 >>= IB[mode] - 1; + alpha_index[i] = color_index[i]; + } + else { + color_index[i] = data1 & mask1; + data1 >>= IB[mode]; + alpha_index[i] = color_index[i]; + } + } + else { // Implies mode 4. + // Because the bits cross the 64-bit word boundary, we have to be careful. + // Block index is 50 at this point. + uint64_t data = block.data0 >> 50; + data |= block.data1 << 14; + for (int i = 0; i < 16; i++) + if (i == anchor_index[subset_index[i]]) { + // Highest bit is zero. + if (index_selection_bit) { // Implies mode == 4. + alpha_index[i] = data & 0x1; + data >>= 1; + } + else { + color_index[i] = data & 0x1; + data >>= 1; + } + } + else { + if (index_selection_bit) { // Implies mode == 4. + alpha_index[i] = data & 0x3; + data >>= 2; + } + else { + color_index[i] = data & 0x3; + data >>= 2; + } + } + // Block index is 81 at this point. + data1 = block.data1 >> (81 - 64); + } + // Extract secondary index bits. + if (IB2[mode] > 0) { + uint8_t mask1 = (1 << IB2[mode]) - 1; + uint8_t mask2 = (1 << (IB2[mode] - 1)) - 1; + for (int i = 0; i < 16; i++) + if (i == anchor_index[subset_index[i]]) { + // Highest bit is zero. + if (index_selection_bit) { + color_index[i] = data1 & 0x3; + data1 >>= 2; + } + else { +// alpha_index[i] = block_extract_bits(&block, IB2[mode] - 1); + alpha_index[i] = data1 & mask2; + data1 >>= IB2[mode] - 1; + } + } + else { + if (index_selection_bit) { + color_index[i] = data1 & 0x7; + data1 >>= 3; + } + else { +// alpha_index[i] = block_extract_bits(&block, IB2[mode]); + alpha_index[i] = data1 & mask1; + data1 >>= IB2[mode]; + } + } + } + + uint32_t *pixel32_buffer = (uint32_t *)pixel_buffer; + for (int i = 0; i < 16; i++) { + uint8_t endpoint_start[4]; + uint8_t endpoint_end[4]; + for (int j = 0; j < 4; j++) { + endpoint_start[j] = endpoint_array[2 * subset_index[i] * 4 + j]; + endpoint_end[j] = endpoint_array[(2 * subset_index[i] + 1) * 4 + j]; + } + + uint32_t output = 0; + output = detexPack32R8(Interpolate(endpoint_start[0], endpoint_end[0], color_index[i], color_index_bitcount)); + output |= detexPack32G8(Interpolate(endpoint_start[1], endpoint_end[1], color_index[i], color_index_bitcount)); + output |= detexPack32B8(Interpolate(endpoint_start[2], endpoint_end[2], color_index[i], color_index_bitcount)); + output |= detexPack32A8(Interpolate(endpoint_start[3], endpoint_end[3], alpha_index[i], alpha_index_bitcount)); + + if (rotation > 0) { + if (rotation == 1) + output = detexPack32RGBA8(detexPixel32GetA8(output), detexPixel32GetG8(output), + detexPixel32GetB8(output), detexPixel32GetR8(output)); + else + if (rotation == 2) + output = detexPack32RGBA8(detexPixel32GetR8(output), detexPixel32GetA8(output), + detexPixel32GetB8(output), detexPixel32GetG8(output)); + else // rotation == 3 + output = detexPack32RGBA8(detexPixel32GetR8(output), detexPixel32GetG8(output), + detexPixel32GetA8(output), detexPixel32GetB8(output)); + } + pixel32_buffer[i] = output; + } + return true; +} + +#if 0 +/* Modify compressed block to use specific colors. For later use. */ +static void SetBlockColors(uint8_t * DETEX_RESTRICT bitstring, uint32_t flags, +uint32_t * DETEX_RESTRICT colors) { + if ((flags & TWO_COLORS) == 0) + return; + uint64_t data0 = *(uint64_t *)&bitstring[0]; + uint64_t data1 = *(uint64_t *)&bitstring[8]; + if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 3)) { + // Mode 3, 7 color bits. + // Color bits at index: 10 + // Color bits end before index: 10 + 4 * 3 * 7 = 94 + uint32_t r0 = detexPixel32GetR8(colors[0]); + uint32_t g0 = detexPixel32GetG8(colors[0]); + uint32_t b0 = detexPixel32GetB8(colors[0]); + uint32_t r1 = detexPixel32GetR8(colors[1]); + uint32_t g1 = detexPixel32GetG8(colors[1]); + uint32_t b1 = detexPixel32GetB8(colors[1]); + data0 = detexSetBits64(data0, 10, 16, r0 >> 1); + data0 = detexSetBits64(data0, 17, 23, r0 >> 1); + data0 = detexSetBits64(data0, 24, 30, r1 >> 1); + data0 = detexSetBits64(data0, 31, 37, r1 >> 1); + data0 = detexSetBits64(data0, 38, 44, g0 >> 1); + data0 = detexSetBits64(data0, 45, 51, g0 >> 1); + data0 = detexSetBits64(data0, 52, 58, g1 >> 1); + data0 = detexSetBits64(data0, 59, 63, (g1 >> 1) & 0x1F); + data1 = detexSetBits64(data1, 0, 1, ((g1 >> 1) & 0x60) >> 5); + data1 = detexSetBits64(data1, 2, 8, b0 >> 1); + data1 = detexSetBits64(data1, 9, 15, b0 >> 1); + data1 = detexSetBits64(data1, 16, 22, b1 >> 1); + data1 = detexSetBits64(data1, 23, 29, b1 >> 1); + *(uint64_t *)&bitstring[0] = data0; + *(uint64_t *)&bitstring[8] = data1; +// printf("bptc_set_block_colors: Colors set for mode 3.\n"); + } + else if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 5)) { + // Mode 5, 7 color bits, 8 alpha bits. + // Color bits at index: 6 + 2 = 8 + // Alpha bits at index: 8 + 2 * 3 * 7 = 50 + // Alpha bits end before index: 50 + 2 * 8 = 66 + uint32_t r0 = detexPixel32GetR8(colors[0]); + uint32_t g0 = detexPixel32GetG8(colors[0]); + uint32_t b0 = detexPixel32GetB8(colors[0]); + uint32_t r1 = detexPixel32GetR8(colors[1]); + uint32_t g1 = detexPixel32GetG8(colors[1]); + uint32_t b1 = detexPixel32GetB8(colors[1]); + data0 = detexSetBits64(data0, 8, 14, r0 >> 1); + data0 = detexSetBits64(data0, 15, 21, r1 >> 1); + data0 = detexSetBits64(data0, 22, 28, g0 >> 1); + data0 = detexSetBits64(data0, 29, 35, g0 >> 1); + data0 = detexSetBits64(data0, 36, 42, b0 >> 1); + data0 = detexSetBits64(data0, 43, 49, b1 >> 1); + if (flags & (MODES_ALLOWED_PUNCHTHROUGH_ONLY)) { + data0 = detexSetBits64(data0, 50, 57, 0x00); + data0 = detexSetBits64(data0, 58, 63, 0x3F); + data1 = detexSetBits64(data1, 0, 1, 0x3); + } + *(uint64_t *)&bitstring[0] = data0; + *(uint64_t *)&bitstring[8] = data1; +// printf("bptc_set_block_colors: Colors set for mode 5.\n"); + } + else if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 6)) { + // Mode 5, 7 color bits, 7 alpha bits. + // Color bits at index 7. + // Alpha bits at index: 7 + 2 * 3 * 7 = 49 + // Alpha bits end before index: 49 + 2 * 7 = 63 + uint32_t r0 = detexPixel32GetR8(colors[0]); + uint32_t g0 = detexPixel32GetG8(colors[0]); + uint32_t b0 = detexPixel32GetB8(colors[0]); + uint32_t r1 = detexPixel32GetR8(colors[1]); + uint32_t g1 = detexPixel32GetG8(colors[1]); + uint32_t b1 = detexPixel32GetB8(colors[1]); + data0 = detexSetBits64(data0, 7, 13, r0 >> 1); + data0 = detexSetBits64(data0, 14, 20, r1 >> 1); + data0 = detexSetBits64(data0, 21, 27, g0 >> 1); + data0 = detexSetBits64(data0, 28, 34, g1 >> 1); + data0 = detexSetBits64(data0, 35, 41, b0 >> 1); + data0 = detexSetBits64(data0, 42, 48, b1 >> 1); + if (flags & (MODES_ALLOWED_PUNCHTHROUGH_ONLY)) { + data0 = detexSetBits64(data0, 49, 55, 0x00); + data0 = detexSetBits64(data0, 56, 62, 0x7F); + } + *(uint64_t *)&bitstring[0] = data0; +// printf("bptc_set_block_colors: Colors set for mode 6.\n"); + } +} +#endif + +/* Return the internal mode of the BPTC block. */ +uint32_t detexGetModeBPTC(const uint8_t *bitstring) { + detexBlock128 block; + block.data0 = *(uint64_t *)&bitstring[0]; + block.data1 = *(uint64_t *)&bitstring[8]; + block.index = 0; + int mode = ExtractMode(&block); + return mode; +} + +void detexSetModeBPTC(uint8_t *bitstring, uint32_t mode, uint32_t flags, +uint32_t *colors) { + // Mode 0 starts with 1 + // Mode 1 starts with 01 + // ... + // Mode 7 starts with 00000001 + int bit = 0x1 << mode; + bitstring[0] &= ~(bit - 1); + bitstring[0] |= bit; + return; +} + diff --git a/Source/ThirdParty/detex/decompress-rgtc.cpp b/Source/ThirdParty/detex/decompress-rgtc.cpp new file mode 100644 index 000000000..6403e814d --- /dev/null +++ b/Source/ThirdParty/detex/decompress-rgtc.cpp @@ -0,0 +1,148 @@ +/* + +Copyright (c) 2015 Harm Hanemaaijer + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +*/ + +#include "detex.h" + +// For each pixel, decode an 8-bit integer and store as follows: +// If shift and offset are zero, store each value in consecutive 8 bit values in pixel_buffer. +// If shift is one, store each value in consecutive 16-bit words in pixel_buffer; if offset +// is zero, store it in the first 8 bits, if offset is one store it in the last 8 bits of each +// 16-bit word. +static DETEX_INLINE_ONLY void DecodeBlockRGTC(const uint8_t * DETEX_RESTRICT bitstring, int shift, +int offset, uint8_t * DETEX_RESTRICT pixel_buffer) { + // LSBFirst byte order only. + uint64_t bits = (*(uint64_t *)&bitstring[0]) >> 16; + int lum0 = bitstring[0]; + int lum1 = bitstring[1]; + for (int i = 0; i < 16; i++) { + int control_code = bits & 0x7; + uint8_t output; + if (lum0 > lum1) + switch (control_code) { + case 0 : output = lum0; break; + case 1 : output = lum1; break; + case 2 : output = detexDivide0To1791By7(6 * lum0 + lum1); break; + case 3 : output = detexDivide0To1791By7(5 * lum0 + 2 * lum1); break; + case 4 : output = detexDivide0To1791By7(4 * lum0 + 3 * lum1); break; + case 5 : output = detexDivide0To1791By7(3 * lum0 + 4 * lum1); break; + case 6 : output = detexDivide0To1791By7(2 * lum0 + 5 * lum1); break; + case 7 : output = detexDivide0To1791By7(lum0 + 6 * lum1); break; + } + else + switch (control_code) { + case 0 : output = lum0; break; + case 1 : output = lum1; break; + case 2 : output = detexDivide0To1279By5(4 * lum0 + lum1); break; + case 3 : output = detexDivide0To1279By5(3 * lum0 + 2 * lum1); break; + case 4 : output = detexDivide0To1279By5(2 * lum0 + 3 * lum1); break; + case 5 : output = detexDivide0To1279By5(lum0 + 4 * lum1); break; + case 6 : output = 0; break; + case 7 : output = 0xFF; break; + } + pixel_buffer[(i << shift) + offset] = output; + bits >>= 3; + } +} + +/* Decompress a 64-bit 4x4 pixel texture block compressed using the */ +/* unsigned RGTC1 (BC4) format. */ +bool detexDecompressBlockRGTC1(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask, +uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) { + DecodeBlockRGTC(bitstring, 0, 0, pixel_buffer); + return true; +} + +/* Decompress a 128-bit 4x4 pixel texture block compressed using the */ +/* unsigned RGTC2 (BC5) format. */ +bool detexDecompressBlockRGTC2(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask, +uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) { + DecodeBlockRGTC(bitstring, 1, 0, pixel_buffer); + DecodeBlockRGTC(&bitstring[8], 1, 1, pixel_buffer); + return true; +} + +// For each pixel, decode an 16-bit integer and store as follows: +// If shift and offset are zero, store each value in consecutive 16 bit values in pixel_buffer. +// If shift is one, store each value in consecutive 32-bit words in pixel_buffer; if offset +// is zero, store it in the first 16 bits, if offset is one store it in the last 16 bits of each +// 32-bit word. Returns true if the compressed block is valid. +static DETEX_INLINE_ONLY bool DecodeBlockSignedRGTC(const uint8_t * DETEX_RESTRICT bitstring, int shift, +int offset, uint8_t * DETEX_RESTRICT pixel_buffer) { + // LSBFirst byte order only. + uint64_t bits = (*(uint64_t *)&bitstring[0]) >> 16; + int lum0 = (int8_t)bitstring[0]; + int lum1 = (int8_t)bitstring[1]; + if (lum0 == - 127 && lum1 == - 128) + // Not allowed. + return false; + if (lum0 == - 128) + lum0 = - 127; + if (lum1 == - 128) + lum1 = - 127; + // Note: values are mapped to a red value of -127 to 127. + uint16_t *pixel16_buffer = (uint16_t *)pixel_buffer; + for (int i = 0; i < 16; i++) { + int control_code = bits & 0x7; + int32_t result; + if (lum0 > lum1) + switch (control_code) { + case 0 : result = lum0; break; + case 1 : result = lum1; break; + case 2 : result = detexDivideMinus895To895By7(6 * lum0 + lum1); break; + case 3 : result = detexDivideMinus895To895By7(5 * lum0 + 2 * lum1); break; + case 4 : result = detexDivideMinus895To895By7(4 * lum0 + 3 * lum1); break; + case 5 : result = detexDivideMinus895To895By7(3 * lum0 + 4 * lum1); break; + case 6 : result = detexDivideMinus895To895By7(2 * lum0 + 5 * lum1); break; + case 7 : result = detexDivideMinus895To895By7(lum0 + 6 * lum1); break; + } + else + switch (control_code) { + case 0 : result = lum0; break; + case 1 : result = lum1; break; + case 2 : result = detexDivideMinus639To639By5(4 * lum0 + lum1); break; + case 3 : result = detexDivideMinus639To639By5(3 * lum0 + 2 * lum1); break; + case 4 : result = detexDivideMinus639To639By5(2 * lum0 + 3 * lum1); break; + case 5 : result = detexDivideMinus639To639By5(lum0 + 4 * lum1); break; + case 6 : result = - 127; break; + case 7 : result = 127; break; + } + // Map from [-127, 127] to [-32768, 32767]. + pixel16_buffer[(i << shift) + offset] = (uint16_t)(int16_t) + ((result + 127) * 65535 / 254 - 32768); + bits >>= 3; + } + return true; +} + +/* Decompress a 64-bit 4x4 pixel texture block compressed using the */ +/* signed RGTC1 (signed BC4) format. */ +bool detexDecompressBlockSIGNED_RGTC1(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask, +uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) { + return DecodeBlockSignedRGTC(bitstring, 0, 0, pixel_buffer); +} + +/* Decompress a 128-bit 4x4 pixel texture block compressed using the */ +/* signed RGTC2 (signed BC5) format. */ +bool detexDecompressBlockSIGNED_RGTC2(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask, +uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) { + bool r = DecodeBlockSignedRGTC(bitstring, 1, 0, pixel_buffer); + if (!r) + return false; + return DecodeBlockSignedRGTC(&bitstring[8], 1, 1, pixel_buffer); +} + diff --git a/Source/ThirdParty/detex/detex.h b/Source/ThirdParty/detex/detex.h index c52566679..adbeac512 100644 --- a/Source/ThirdParty/detex/detex.h +++ b/Source/ThirdParty/detex/detex.h @@ -34,7 +34,6 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define DETEX_HELPER_SHARED_IMPORT __declspec(dllimport) #define DETEX_HELPER_SHARED_EXPORT __declspec(dllexport) #define DETEX_HELPER_SHARED_LOCAL - #define DETEX_INLINE_ONLY __forceinline #else #if __GNUC__ >= 4 #define DETEX_HELPER_SHARED_IMPORT __attribute__ ((visibility ("default"))) @@ -45,7 +44,6 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define DETEX_HELPER_SHARED_EXPORT #define DETEX_HELPER_SHARED_LOCAL #endif - #define DETEX_INLINE_ONLY __attribute__((always_inline)) inline #endif /* Now we use the generic helper definitions above to define DETEX_API and DETEX_LOCAL. */ @@ -74,6 +72,7 @@ __BEGIN_DECLS #include #include +#define DETEX_INLINE_ONLY __attribute__((always_inline)) inline #define DETEX_RESTRICT __restrict /* Maximum uncompressed block size in bytes. */