diff --git a/.gitattributes b/.gitattributes
index f7bd4d06..80c19213 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -126,6 +126,7 @@
*.dds filter=lfs diff=lfs merge=lfs -text
*.ktx filter=lfs diff=lfs merge=lfs -text
*.ktx2 filter=lfs diff=lfs merge=lfs -text
+*.astc filter=lfs diff=lfs merge=lfs -text
*.pam filter=lfs diff=lfs merge=lfs -text
*.pbm filter=lfs diff=lfs merge=lfs -text
*.pgm filter=lfs diff=lfs merge=lfs -text
diff --git a/ImageSharp.Textures.sln b/ImageSharp.Textures.sln
index 636514f8..7568b07c 100644
--- a/ImageSharp.Textures.sln
+++ b/ImageSharp.Textures.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29613.14
+# Visual Studio Version 18
+VisualStudioVersion = 18.5.11716.220
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageSharp.Textures", "src\ImageSharp.Textures\ImageSharp.Textures.csproj", "{1588F6C4-2186-4A35-9693-E9F296791393}"
EndProject
@@ -50,13 +50,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{
EndProjectSection
EndProject
Global
- GlobalSection(SharedMSBuildProjectFiles) = preSolution
- shared-infrastructure\src\SharedInfrastructure\SharedInfrastructure.projitems*{1588f6c4-2186-4a35-9693-e9f296791393}*SharedItemsImports = 5
- tests\Images\Images.projitems*{17fcbd4d-d232-45e8-876f-dfbc2fad52cf}*SharedItemsImports = 5
- tests\Images\Images.projitems*{18be79b6-6b95-4ed7-a963-ad75f6cb9f3c}*SharedItemsImports = 5
- tests\Images\Images.projitems*{68a8cc40-6aed-4e96-b524-31b1158fdeea}*SharedItemsImports = 13
- tests\Images\Images.projitems*{b159ffd1-e646-42d0-892c-4abf69103712}*SharedItemsImports = 5
- EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
@@ -94,4 +87,11 @@ Global
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {F1762A0D-74C4-454A-BCB7-C010BB067E58}
EndGlobalSection
+ GlobalSection(SharedMSBuildProjectFiles) = preSolution
+ shared-infrastructure\src\SharedInfrastructure\SharedInfrastructure.projitems*{1588f6c4-2186-4a35-9693-e9f296791393}*SharedItemsImports = 5
+ tests\Images\Images.projitems*{17fcbd4d-d232-45e8-876f-dfbc2fad52cf}*SharedItemsImports = 5
+ tests\Images\Images.projitems*{18be79b6-6b95-4ed7-a963-ad75f6cb9f3c}*SharedItemsImports = 5
+ tests\Images\Images.projitems*{68a8cc40-6aed-4e96-b524-31b1158fdeea}*SharedItemsImports = 13
+ tests\Images\Images.projitems*{b159ffd1-e646-42d0-892c-4abf69103712}*SharedItemsImports = 5
+ EndGlobalSection
EndGlobal
diff --git a/src/Directory.Build.props b/src/Directory.Build.props
index 2813cc4b..aa98dc2e 100644
--- a/src/Directory.Build.props
+++ b/src/Directory.Build.props
@@ -11,7 +11,7 @@
-->
-
+
@@ -22,7 +22,6 @@
-
diff --git a/src/ImageSharp.Textures/Compression/Astc/AstcDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/AstcDecoder.cs
new file mode 100644
index 00000000..a6750e0f
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/AstcDecoder.cs
@@ -0,0 +1,541 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Buffers;
+using System.Buffers.Binary;
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+using SixLabors.ImageSharp.Textures.Compression.Astc.IO;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc;
+
+///
+/// Provides methods to decode ASTC-compressed texture data into uncompressed pixel formats.
+///
+///
+/// The decoder returns raw decoded values and does not apply any gamma or color-space
+/// transform. Callers loading ASTC data from an sRGB-tagged container (e.g. a KTX file
+/// with an *_SRGB_BLOCK format) are responsible for applying sRGB-to-linear conversion
+/// downstream if they need linear values.
+///
+public static class AstcDecoder
+{
+ ///
+ /// Decompresses ASTC-compressed data to uncompressed RGBA32 format (4 bytes per pixel).
+ ///
+ /// The ASTC-compressed texture data
+ /// Image width in pixels
+ /// Image height in pixels
+ /// The ASTC block footprint (e.g., 4x4, 5x5)
+ ///
+ /// Array of bytes in RGBA32 format (width * height * 4 bytes total), or an empty span if the
+ /// input is structurally invalid. Individual malformed blocks are skipped and leave zeros in the output.
+ ///
+ public static Span DecompressImage(ReadOnlySpan astcData, int width, int height, Footprint footprint)
+ {
+ Guard.MustBeGreaterThan(width, 0, nameof(width));
+ Guard.MustBeGreaterThan(height, 0, nameof(height));
+
+ long totalPixels = (long)width * height;
+ Guard.MustBeLessThanOrEqualTo(totalPixels, (long)int.MaxValue / BlockInfo.ChannelsPerPixel, nameof(totalPixels));
+
+ int totalBytes = (int)(totalPixels * BlockInfo.ChannelsPerPixel);
+ byte[] imageBuffer = new byte[totalBytes];
+
+ return DecompressImage(astcData, width, height, footprint, imageBuffer)
+ ? imageBuffer
+ : [];
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data to uncompressed RGBA32 format into a caller-provided buffer.
+ ///
+ /// The ASTC-compressed texture data
+ /// Image width in pixels
+ /// Image height in pixels
+ /// The ASTC block footprint (e.g., 4x4, 5x5)
+ /// Output buffer. Must be at least width * height * 4 bytes.
+ ///
+ /// True if the input was structurally valid and decoding ran, false if it was rejected
+ /// up front. Individual malformed blocks are skipped and leave zeros in the output.
+ ///
+ public static bool DecompressImage(ReadOnlySpan astcData, int width, int height, Footprint footprint, Span imageBuffer)
+ {
+ ValidateImageArgs(width, height, imageBuffer.Length, BlockInfo.ChannelsPerPixel);
+
+ if (!TryGetBlockLayout(astcData, width, height, footprint, out int blocksWide, out int blocksHigh))
+ {
+ return false;
+ }
+
+ using IMemoryOwner decodedBlock = MemoryAllocator.Default.Allocate(footprint.PixelCount * BlockInfo.ChannelsPerPixel);
+ DecodeAllBlocks(astcData, width, height, footprint, blocksWide, blocksHigh, imageBuffer, decodedBlock.Memory.Span);
+ return true;
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data read from a stream to uncompressed RGBA32 format.
+ /// Reads exactly the bytes implied by , ,
+ /// and .
+ ///
+ /// The stream containing ASTC-compressed block data.
+ /// Image width in pixels.
+ /// Image height in pixels.
+ /// The ASTC block footprint (e.g., 4x4, 5x5).
+ ///
+ /// Array of bytes in RGBA32 format (width * height * 4 bytes total). The stream's read
+ /// position advances by the consumed block bytes.
+ ///
+ ///
+ /// Thrown if the stream contains fewer bytes than the footprint requires.
+ ///
+ public static Span DecompressImage(Stream stream, int width, int height, Footprint footprint)
+ {
+ Guard.NotNull(stream);
+ Guard.MustBeGreaterThan(width, 0, nameof(width));
+ Guard.MustBeGreaterThan(height, 0, nameof(height));
+
+ long totalPixels = (long)width * height;
+ Guard.MustBeLessThanOrEqualTo(totalPixels, (long)int.MaxValue / BlockInfo.ChannelsPerPixel, nameof(totalPixels));
+
+ byte[] imageBuffer = new byte[(int)(totalPixels * BlockInfo.ChannelsPerPixel)];
+ return DecompressImage(stream, width, height, footprint, imageBuffer)
+ ? imageBuffer
+ : [];
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data read from a stream into a caller-provided buffer.
+ ///
+ /// The stream containing ASTC-compressed block data.
+ /// Image width in pixels.
+ /// Image height in pixels.
+ /// The ASTC block footprint.
+ /// Output buffer. Must be at least width * height * 4 bytes.
+ ///
+ /// True if the stream contained the expected block count and decoding ran. The stream's
+ /// read position advances by the consumed block bytes.
+ ///
+ ///
+ /// Thrown if the stream contains fewer bytes than the footprint requires.
+ ///
+ public static bool DecompressImage(Stream stream, int width, int height, Footprint footprint, Span imageBuffer)
+ {
+ Guard.NotNull(stream);
+ ValidateImageArgs(width, height, imageBuffer.Length, BlockInfo.ChannelsPerPixel);
+
+ int expectedBytes = ComputeExpectedBlockStreamSize(width, height, footprint);
+ using IMemoryOwner blocks = MemoryAllocator.Default.Allocate(expectedBytes);
+ Span blockSpan = blocks.Memory.Span[..expectedBytes];
+ stream.ReadExactly(blockSpan);
+
+ return DecompressImage((ReadOnlySpan)blockSpan, width, height, footprint, imageBuffer);
+ }
+
+ ///
+ /// Shared image-decode loop for both LDR and HDR profiles (ASTC spec §C.2.7 decode
+ /// procedure, §C.2.5 LDR/HDR modes). Iterates
+ /// the compressed block array in raster order, parses each block via
+ /// , runs the pipeline's profile check, and dispatches to
+ /// the appropriate per-block decoder.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecodeAllBlocks(
+ ReadOnlySpan astcData,
+ int width,
+ int height,
+ Footprint footprint,
+ int blocksWide,
+ int blocksHigh,
+ Span imageBuffer,
+ Span decodedPixels)
+ where TPipeline : struct, IBlockPipeline
+ where T : unmanaged
+ {
+ TPipeline pipeline = default;
+ int blockIndex = 0;
+
+ for (int blockY = 0; blockY < blocksHigh; blockY++)
+ {
+ for (int blockX = 0; blockX < blocksWide; blockX++)
+ {
+ int index = blockIndex++;
+ UInt128 blockBits = ReadBlockBits(astcData, index);
+
+ BlockInfo info = BlockModeDecoder.Decode(blockBits);
+ BlockDestination dest = ComputeBlockDestination(blockX, blockY, footprint, width, height);
+
+ // Spec §C.2.19, §C.2.24, §C.2.25: illegal block encodings, and HDR endpoint modes
+ // in the LDR profile, must produce the error colour (magenta) for every texel.
+ if (!info.IsValid || !pipeline.IsBlockLegal(in info))
+ {
+ pipeline.WriteErrorColorClipped(
+ footprint, dest.DstBaseX, dest.DstBaseY, dest.CopyWidth, dest.CopyHeight, width, imageBuffer);
+ continue;
+ }
+
+ DecodeBlock(blockBits, in info, footprint, dest, width, imageBuffer, decodedPixels);
+ }
+ }
+ }
+
+ ///
+ /// Routes a single block to the best available path. Single-partition, single-plane,
+ /// non-void-extent blocks (the common shape per ASTC spec §C.2.10, §C.2.20, §C.2.23) take
+ /// the fused fast path — directly to the image buffer when the block fits entirely inside
+ /// the image, or to a scratch buffer at image edges that need cropping. Everything else
+ /// (void-extent, multi-partition, dual-plane) falls through to the general
+ /// pipeline.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecodeBlock(
+ UInt128 blockBits,
+ in BlockInfo info,
+ Footprint footprint,
+ BlockDestination dest,
+ int imageWidth,
+ Span imageBuffer,
+ Span decodedPixels)
+ where TPipeline : struct, IBlockPipeline
+ where T : unmanaged
+ {
+ TPipeline pipeline = default;
+
+ if (info.IsFusable && dest.IsFullInteriorBlock)
+ {
+ pipeline.FusedToImage(blockBits, in info, footprint, dest.DstBaseX, dest.DstBaseY, imageWidth, imageBuffer);
+ return;
+ }
+
+ if (info.IsFusable)
+ {
+ pipeline.FusedToScratch(blockBits, in info, footprint, decodedPixels);
+ }
+ else
+ {
+ pipeline.LogicalWrite(blockBits, in info, footprint, decodedPixels);
+ }
+
+ CopyBlockRect(decodedPixels, imageBuffer, footprint.Width, dest.CopyWidth, dest.CopyHeight, dest.DstBaseX, dest.DstBaseY, imageWidth);
+ }
+
+ ///
+ /// Shared single-block decode path for the public DecompressBlock entry points.
+ /// Runs the pipeline's profile check (LDR rejects HDR content per ASTC spec §C.2.19),
+ /// then dispatches to the fused fast path for the common shape (single-partition,
+ /// single-plane, non-void-extent — spec §C.2.10, §C.2.20, §C.2.23) or the general
+ /// pipeline otherwise. The caller's
+ /// is sized for exactly one block, so there's no interior/edge distinction.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecodeSingleBlock(ReadOnlySpan blockData, Footprint footprint, Span buffer)
+ where TPipeline : struct, IBlockPipeline
+ where T : unmanaged
+ {
+ UInt128 blockBits = BinaryPrimitives.ReadUInt128LittleEndian(blockData);
+ BlockInfo info = BlockModeDecoder.Decode(blockBits);
+ TPipeline pipeline = default;
+
+ // Spec §C.2.19, §C.2.24, §C.2.25: illegal blocks and HDR-in-LDR emit magenta.
+ if (!info.IsValid || !pipeline.IsBlockLegal(in info))
+ {
+ pipeline.WriteErrorColor(footprint, buffer);
+ return;
+ }
+
+ if (info.IsFusable)
+ {
+ pipeline.FusedToScratch(blockBits, in info, footprint, buffer);
+ return;
+ }
+
+ pipeline.LogicalWrite(blockBits, in info, footprint, buffer);
+ }
+
+ ///
+ /// Decompresses a single ASTC block to RGBA32 pixel data
+ ///
+ /// The data to decode
+ /// The type of ASTC block footprint e.g. 4x4, 5x5, etc.
+ /// The buffer to write the decoded pixels into
+ public static void DecompressBlock(ReadOnlySpan blockData, Footprint footprint, Span buffer)
+ {
+ Guard.MustBeSizedAtLeast(blockData, BlockInfo.SizeInBytes, nameof(blockData));
+ Guard.MustBeSizedAtLeast(buffer, footprint.PixelCount * BlockInfo.ChannelsPerPixel, nameof(buffer));
+
+ DecodeSingleBlock(blockData, footprint, buffer);
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data to RGBA values.
+ ///
+ /// The ASTC-compressed texture data
+ /// Image width in pixels
+ /// Image height in pixels
+ /// The ASTC block footprint (e.g., 4x4, 5x5)
+ ///
+ /// Values in RGBA order. For HDR content, values may exceed 1.0.
+ ///
+ public static Span DecompressHdrImage(ReadOnlySpan astcData, int width, int height, Footprint footprint)
+ {
+ Guard.MustBeGreaterThan(width, 0, nameof(width));
+ Guard.MustBeGreaterThan(height, 0, nameof(height));
+
+ long totalPixels = (long)width * height;
+ Guard.MustBeLessThanOrEqualTo(totalPixels, (long)int.MaxValue / 4, nameof(totalPixels));
+
+ int totalFloats = (int)(totalPixels * 4);
+ float[] imageBuffer = new float[totalFloats];
+ if (!DecompressHdrImage(astcData, width, height, footprint, imageBuffer))
+ {
+ return [];
+ }
+
+ return imageBuffer;
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data to RGBA float values into a caller-provided buffer.
+ ///
+ /// The ASTC-compressed texture data
+ /// Image width in pixels
+ /// Image height in pixels
+ /// The ASTC block footprint (e.g., 4x4, 5x5)
+ /// Output buffer. Must be at least width * height * 4 floats.
+ ///
+ /// True if the input was structurally valid and decoding ran, false if it was rejected
+ /// up front. Individual malformed blocks are skipped and leave zeros in the output.
+ ///
+ public static bool DecompressHdrImage(ReadOnlySpan astcData, int width, int height, Footprint footprint, Span imageBuffer)
+ {
+ ValidateImageArgs(width, height, imageBuffer.Length, BlockInfo.ChannelsPerPixel);
+
+ if (!TryGetBlockLayout(astcData, width, height, footprint, out int blocksWide, out int blocksHigh))
+ {
+ return false;
+ }
+
+ using IMemoryOwner decodedBlock = MemoryAllocator.Default.Allocate(footprint.PixelCount * BlockInfo.ChannelsPerPixel);
+ DecodeAllBlocks(
+ astcData, width, height, footprint, blocksWide, blocksHigh, imageBuffer, decodedBlock.Memory.Span);
+ return true;
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data read from a stream to RGBA float values.
+ ///
+ /// The stream containing ASTC-compressed block data.
+ /// Image width in pixels.
+ /// Image height in pixels.
+ /// The ASTC block footprint.
+ ///
+ /// Values in RGBA order. For HDR content, values may exceed 1.0. The stream's read position
+ /// advances by the consumed block bytes.
+ ///
+ ///
+ /// Thrown if the stream contains fewer bytes than the footprint requires.
+ ///
+ public static Span DecompressHdrImage(Stream stream, int width, int height, Footprint footprint)
+ {
+ Guard.NotNull(stream);
+ Guard.MustBeGreaterThan(width, 0, nameof(width));
+ Guard.MustBeGreaterThan(height, 0, nameof(height));
+
+ long totalPixels = (long)width * height;
+ Guard.MustBeLessThanOrEqualTo(totalPixels, (long)int.MaxValue / BlockInfo.ChannelsPerPixel, nameof(totalPixels));
+
+ float[] imageBuffer = new float[(int)(totalPixels * BlockInfo.ChannelsPerPixel)];
+ return DecompressHdrImage(stream, width, height, footprint, imageBuffer)
+ ? imageBuffer
+ : [];
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data read from a stream into a caller-provided HDR buffer.
+ ///
+ /// The stream containing ASTC-compressed block data.
+ /// Image width in pixels.
+ /// Image height in pixels.
+ /// The ASTC block footprint.
+ /// Output buffer. Must be at least width * height * 4 floats.
+ ///
+ /// True if the stream contained the expected block count and decoding ran. The stream's
+ /// read position advances by the consumed block bytes.
+ ///
+ ///
+ /// Thrown if the stream contains fewer bytes than the footprint requires.
+ ///
+ public static bool DecompressHdrImage(Stream stream, int width, int height, Footprint footprint, Span imageBuffer)
+ {
+ Guard.NotNull(stream);
+ ValidateImageArgs(width, height, imageBuffer.Length, BlockInfo.ChannelsPerPixel);
+
+ int expectedBytes = ComputeExpectedBlockStreamSize(width, height, footprint);
+ using IMemoryOwner blocks = MemoryAllocator.Default.Allocate(expectedBytes);
+ Span blockSpan = blocks.Memory.Span[..expectedBytes];
+ stream.ReadExactly(blockSpan);
+
+ return DecompressHdrImage((ReadOnlySpan)blockSpan, width, height, footprint, imageBuffer);
+ }
+
+ ///
+ /// Decompresses ASTC-compressed data to RGBA values.
+ ///
+ /// The ASTC-compressed texture data
+ /// Image width in pixels
+ /// Image height in pixels
+ /// The ASTC block footprint type
+ ///
+ /// Values in RGBA order. For HDR content, values may exceed 1.0.
+ ///
+ public static Span DecompressHdrImage(ReadOnlySpan astcData, int width, int height, FootprintType footprint)
+ {
+ Footprint footPrint = Footprint.FromFootprintType(footprint);
+ return DecompressHdrImage(astcData, width, height, footPrint);
+ }
+
+ ///
+ /// Decompresses a single ASTC block to float RGBA values.
+ ///
+ /// The 16-byte ASTC block to decode
+ /// The ASTC block footprint
+ /// The buffer to write decoded values into (must be at least footprint.Width * footprint.Height * 4 elements)
+ public static void DecompressHdrBlock(ReadOnlySpan blockData, Footprint footprint, Span buffer)
+ {
+ Guard.MustBeSizedAtLeast(blockData, BlockInfo.SizeInBytes, nameof(blockData));
+ Guard.MustBeSizedAtLeast(buffer, footprint.PixelCount * BlockInfo.ChannelsPerPixel, nameof(buffer));
+
+ DecodeSingleBlock(blockData, footprint, buffer);
+ }
+
+ internal static Span DecompressImage(AstcFile file)
+ {
+ Guard.NotNull(file);
+
+ return DecompressImage(file.Blocks, file.Width, file.Height, file.Footprint);
+ }
+
+ internal static Span DecompressImage(ReadOnlySpan astcData, int width, int height, FootprintType footprint)
+ {
+ Footprint footPrint = Footprint.FromFootprintType(footprint);
+
+ return DecompressImage(astcData, width, height, footPrint);
+ }
+
+ private static bool TryGetBlockLayout(
+ ReadOnlySpan astcData,
+ int width,
+ int height,
+ Footprint footprint,
+ out int blocksWide,
+ out int blocksHigh)
+ {
+ int blockWidth = footprint.Width;
+ int blockHeight = footprint.Height;
+ blocksWide = 0;
+ blocksHigh = 0;
+
+ if (blockWidth <= 0 || blockHeight <= 0 || width <= 0 || height <= 0)
+ {
+ return false;
+ }
+
+ blocksWide = (width + blockWidth - 1) / blockWidth;
+ blocksHigh = (height + blockHeight - 1) / blockHeight;
+
+ // Guard against integer overflow in block count calculation
+ long expectedBlockCount = (long)blocksWide * blocksHigh;
+ if (astcData.Length % BlockInfo.SizeInBytes != 0 || astcData.Length / BlockInfo.SizeInBytes != expectedBlockCount)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ ///
+ /// Validates that and are positive,
+ /// that width × height × does not overflow
+ /// , and that has room for
+ /// the decoded output.
+ ///
+ private static void ValidateImageArgs(int width, int height, int bufferLength, int bytesPerPixel)
+ {
+ Guard.MustBeGreaterThan(width, 0, nameof(width));
+ Guard.MustBeGreaterThan(height, 0, nameof(height));
+
+ long totalPixels = (long)width * height;
+ Guard.MustBeLessThanOrEqualTo(totalPixels, (long)int.MaxValue / bytesPerPixel, nameof(totalPixels));
+
+ long totalElements = totalPixels * bytesPerPixel;
+ Guard.MustBeGreaterThanOrEqualTo(bufferLength, totalElements, nameof(bufferLength));
+ }
+
+ ///
+ /// Returns the total ASTC block-stream byte size for the given image dimensions and
+ /// footprint: ceil(width / blockWidth) * ceil(height / blockHeight) * 16.
+ ///
+ private static int ComputeExpectedBlockStreamSize(int width, int height, Footprint footprint)
+ {
+ int blocksWide = (width + footprint.Width - 1) / footprint.Width;
+ int blocksHigh = (height + footprint.Height - 1) / footprint.Height;
+ return blocksWide * blocksHigh * BlockInfo.SizeInBytes;
+ }
+
+ ///
+ /// Reads the 16 bytes of the ASTC block at into a
+ /// (little-endian). The caller is responsible for ensuring the
+ /// stream contains the requested block — verifies
+ /// astcData.Length matches the expected block count before iteration begins.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static UInt128 ReadBlockBits(ReadOnlySpan astcData, int blockIndex)
+ {
+ int offset = blockIndex * BlockInfo.SizeInBytes;
+ return BinaryPrimitives.ReadUInt128LittleEndian(astcData.Slice(offset, BlockInfo.SizeInBytes));
+ }
+
+ ///
+ /// Computes the destination rectangle for the block at (,
+ /// ) given the image bounds, clipping the footprint extents
+ /// to fit inside the image.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static BlockDestination ComputeBlockDestination(int blockX, int blockY, Footprint footprint, int width, int height)
+ {
+ int dstBaseX = blockX * footprint.Width;
+ int dstBaseY = blockY * footprint.Height;
+ int copyWidth = Math.Min(footprint.Width, width - dstBaseX);
+ int copyHeight = Math.Min(footprint.Height, height - dstBaseY);
+ bool isFullInterior = copyWidth == footprint.Width && copyHeight == footprint.Height;
+ return new BlockDestination(dstBaseX, dstBaseY, copyWidth, copyHeight, isFullInterior);
+ }
+
+ ///
+ /// Copies a decoded block from its scratch buffer into the image at the block's pixel
+ /// offset, row by row, clamped to the image bounds on right/bottom edges. The
+ /// channels-per-pixel factor is fixed at
+ /// (RGBA) so the multiplies fold into constants at JIT time.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void CopyBlockRect(
+ ReadOnlySpan source,
+ Span destination,
+ int blockWidth,
+ int copyWidth,
+ int copyHeight,
+ int dstBaseX,
+ int dstBaseY,
+ int imageWidth)
+ {
+ int copyElements = copyWidth * BlockInfo.ChannelsPerPixel;
+ for (int pixelY = 0; pixelY < copyHeight; pixelY++)
+ {
+ int srcOffset = pixelY * blockWidth * BlockInfo.ChannelsPerPixel;
+ int dstOffset = (((dstBaseY + pixelY) * imageWidth) + dstBaseX) * BlockInfo.ChannelsPerPixel;
+ source.Slice(srcOffset, copyElements).CopyTo(destination.Slice(dstOffset, copyElements));
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BiseEncodingMode.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BiseEncodingMode.cs
new file mode 100644
index 00000000..028efe0c
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BiseEncodingMode.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+
+///
+/// The encoding modes supported by BISE.
+///
+///
+/// Note that the values correspond to the number of symbols in each alphabet.
+///
+internal enum BiseEncodingMode
+{
+ Unknown = 0,
+ BitEncoding = 1,
+ TritEncoding = 3,
+ QuintEncoding = 5,
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BitStream.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BitStream.cs
new file mode 100644
index 00000000..bee45b9f
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BitStream.cs
@@ -0,0 +1,168 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+
+///
+/// A simple bit stream used for reading/writing arbitrary-sized chunks.
+///
+internal struct BitStream
+{
+ private ulong low;
+ private ulong high;
+ private uint dataSize; // number of valid bits in the 128-bit buffer
+
+ public BitStream(ulong data = 0, uint dataSize = 0)
+ {
+ this.low = data;
+ this.high = 0;
+ this.dataSize = dataSize;
+ }
+
+ public BitStream(UInt128 data, uint dataSize)
+ {
+ this.low = data.Low();
+ this.high = data.High();
+ this.dataSize = dataSize;
+ }
+
+ public readonly uint Bits => this.dataSize;
+
+ public void PutBits(ulong value, int size)
+ {
+ if (this.dataSize + (uint)size > 128)
+ {
+ throw new InvalidOperationException("Not enough space in BitStream");
+ }
+
+ if (this.dataSize < 64)
+ {
+ int lowFree = (int)(64 - this.dataSize);
+ if (size <= lowFree)
+ {
+ this.low |= (value & MaskFor(size)) << (int)this.dataSize;
+ }
+ else
+ {
+ this.low |= (value & MaskFor(lowFree)) << (int)this.dataSize;
+ this.high |= (value >> lowFree) & MaskFor(size - lowFree);
+ }
+ }
+ else
+ {
+ int shift = (int)(this.dataSize - 64);
+ this.high |= (value & MaskFor(size)) << shift;
+ }
+
+ this.dataSize += (uint)size;
+ }
+
+ ///
+ /// Attempt to retrieve the specified number of bits from the buffer as a .
+ /// The buffer is shifted accordingly if successful.
+ ///
+ public bool TryGetBits(int count, out UInt128 bits)
+ {
+ UInt128? result = this.GetBitsUInt128(count);
+ bits = result ?? default;
+ return result is not null;
+ }
+
+ public bool TryGetBits(int count, out ulong bits)
+ {
+ if (count > this.dataSize)
+ {
+ bits = 0;
+ return false;
+ }
+
+ bits = count switch
+ {
+ 0 => 0,
+ <= 64 => this.low & MaskFor(count),
+ _ => this.low
+ };
+ this.ShiftBuffer(count);
+ return true;
+ }
+
+ public bool TryGetBits(int count, out uint bits)
+ {
+ if (count > this.dataSize)
+ {
+ bits = 0;
+ return false;
+ }
+
+ bits = (uint)(count switch
+ {
+ 0 => 0UL,
+ <= 64 => this.low & MaskFor(count),
+ _ => this.low
+ });
+ this.ShiftBuffer(count);
+ return true;
+ }
+
+ private static ulong MaskFor(int bits)
+ => bits == 64
+ ? ~0UL
+ : ((1UL << bits) - 1UL);
+
+ private UInt128? GetBitsUInt128(int count)
+ {
+ if (count > this.dataSize)
+ {
+ return null;
+ }
+
+ UInt128 result = count switch
+ {
+ 0 => UInt128.Zero,
+ <= 64 => (UInt128)(this.low & MaskFor(count)),
+ 128 => new UInt128(this.high, this.low),
+ _ => new UInt128(
+ (count - 64 == 64) ? this.high : (this.high & MaskFor(count - 64)),
+ this.low)
+ };
+
+ this.ShiftBuffer(count);
+
+ return result;
+ }
+
+ private void ShiftBuffer(int count)
+ {
+ // C# masks shift amounts to the width of the operand, so `ulong << 64` and `ulong >> 64`
+ // are identity, not zero. Special-case count == 0 and count >= 128 to avoid polluting
+ // the low/high halves on boundary shifts.
+ if (count == 0)
+ {
+ // Reading zero bits is a no-op.
+ }
+ else if (count < 64)
+ {
+ this.low = (this.low >> count) | (this.high << (64 - count));
+ this.high >>= count;
+ }
+ else if (count == 64)
+ {
+ this.low = this.high;
+ this.high = 0;
+ }
+ else if (count < 128)
+ {
+ this.low = this.high >> (count - 64);
+ this.high = 0;
+ }
+ else
+ {
+ this.low = 0;
+ this.high = 0;
+ }
+
+ this.dataSize -= (uint)count;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceCodec.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceCodec.cs
new file mode 100644
index 00000000..b7623a11
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceCodec.cs
@@ -0,0 +1,231 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+
+///
+///
+/// The Bounded Integer Sequence Encoding (BISE) allows storage of character sequences using
+/// arbitrary alphabets of up to 256 symbols. Each alphabet size is encoded in the most
+/// space-efficient choice of bits, trits, and quints (ASTC spec §C.2.12).
+///
+///
+/// The resulting bit pattern is a sequence of encoded blocks. All blocks in a sequence are
+/// one of the following encodings:
+///
+///
+/// - Bit encoding: one encoded value of the form 2^k
+/// - Trit encoding: five encoded values of the form 3*2^k
+/// - Quint encoding: three encoded values of the form 5*2^k
+///
+///
+/// The layouts of each block are designed such that the blocks can be truncated during
+/// encoding in order to support variable length input sequences (i.e. a sequence of values
+/// that are encoded using trit encoded blocks does not need to have a multiple-of-five
+/// length).
+///
+///
+internal static class BoundedIntegerSequenceCodec
+{
+ ///
+ /// The maximum number of bits needed to encode an ISE value.
+ ///
+ ///
+ /// The ASTC specification does not give a maximum number, however unquantized color
+ /// values have a maximum range of 255, meaning that we can't feasibly have more
+ /// than eight bits per value.
+ ///
+ private const int Log2MaxRangeForBits = 8;
+
+ ///
+ /// Flat trit encodings for BISE blocks (256 rows × 5 trits, row-major).
+ ///
+ ///
+ /// Used to decode blocks of values encoded using the ASTC integer sequence encoding.
+ /// Five trits (values that can take any number in the range [0, 2]) can take on a
+ /// total of 3^5 = 243 total values, which can be stored in eight bits. These eight
+ /// bits are used to decode the five trits based on the ASTC specification §C.2.12.
+ /// For simplicity, we store a look-up table here so that we don't need to implement
+ /// the decoding logic. Similarly, seven bits are used to decode three quints.
+ ///
+ internal static readonly int[] FlatTritEncodings =
+ [
+ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0,
+ 1, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
+ 2, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 2, 1, 0, 0, 1, 2, 1, 0, 0, 2, 2, 1, 0, 0,
+ 2, 1, 2, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 2, 1, 0,
+ 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 2, 0, 1, 0, 1, 2, 0, 1, 0, 2, 2, 0, 1, 0, 2, 0, 2, 1, 0, 0, 2, 2, 1, 0,
+ 1, 2, 2, 1, 0, 2, 2, 2, 1, 0, 2, 0, 2, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 0, 1, 1, 0, 0, 1, 2, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
+ 2, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 2, 1, 1, 0, 1, 2, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 2, 1, 0, 0, 1, 0, 2, 2, 1, 1, 0, 2, 2, 2, 1, 0, 2, 2,
+ 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 1, 0, 2, 0, 1, 1, 0, 2, 0, 2, 1, 0, 2, 0, 1, 0, 2, 2, 0,
+ 0, 2, 0, 2, 0, 1, 2, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 1, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 1, 2, 0,
+ 1, 0, 1, 2, 0, 2, 0, 1, 2, 0, 0, 1, 2, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0, 1, 1, 2, 2, 0, 0, 2, 1, 2, 0, 1, 2, 1, 2, 0,
+ 2, 2, 1, 2, 0, 2, 1, 2, 2, 0, 0, 2, 0, 2, 2, 1, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 0, 0, 0, 2,
+ 0, 0, 2, 0, 2, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2, 2, 1, 0, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2,
+ 0, 2, 2, 0, 2, 1, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 0, 0, 1, 0, 2, 1, 0, 1, 0, 2, 2, 0, 1, 0, 2, 0, 1, 2, 0, 2, 0, 1, 1, 0, 2,
+ 1, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 1, 2, 0, 2, 0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 2, 2, 1, 2, 0, 2, 0, 2, 2, 2, 2, 1, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 2, 1, 0, 0, 1,
+ 1, 0, 2, 0, 1, 0, 2, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0, 1, 2, 0, 2, 0, 1,
+ 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 0, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 1,
+ 1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 2, 0, 1, 0, 0, 1, 2, 2, 1, 0, 1, 2, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
+ 2, 0, 0, 1, 1, 0, 0, 2, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 2, 1, 0, 1, 1, 1, 0, 2, 1, 1, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1,
+ 2, 0, 2, 1, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 0, 2, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 0, 1, 1, 2, 2,
+ 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 0, 0, 0, 2, 1, 1, 0, 0, 2, 1, 2, 0, 0, 2, 1, 0, 0, 2, 2, 1, 0, 1, 0, 2, 1, 1, 1, 0, 2, 1,
+ 2, 1, 0, 2, 1, 1, 0, 2, 2, 1, 0, 2, 0, 2, 1, 1, 2, 0, 2, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, 0, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1,
+ 2, 0, 2, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 2, 1, 2, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1,
+ 0, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 1, 2,
+ 1, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 1, 2, 0, 1, 0, 1, 2, 1, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 2,
+ 2, 2, 0, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 0, 2, 1, 2, 0, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 0, 1, 1, 2,
+ 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 0, 2, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2,
+ 0, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2
+ ];
+
+ ///
+ /// Flat quint encodings for BISE blocks (128 rows × 3 quints, row-major).
+ ///
+ ///
+ /// See for more details.
+ ///
+ internal static readonly int[] FlatQuintEncodings =
+ [
+ 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 0, 4, 0, 4, 4, 0, 4, 4, 4, 0, 1, 0, 1, 1, 0, 2, 1, 0, 3, 1, 0, 4, 1, 0,
+ 1, 4, 0, 4, 4, 1, 4, 4, 4, 0, 2, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 4, 2, 0, 2, 4, 0, 4, 4, 2, 4, 4, 4, 0, 3, 0, 1, 3, 0,
+ 2, 3, 0, 3, 3, 0, 4, 3, 0, 3, 4, 0, 4, 4, 3, 4, 4, 4, 0, 0, 1, 1, 0, 1, 2, 0, 1, 3, 0, 1, 4, 0, 1, 0, 4, 1, 4, 0, 4,
+ 0, 4, 4, 0, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 4, 1, 1, 1, 4, 1, 4, 1, 4, 1, 4, 4, 0, 2, 1, 1, 2, 1, 2, 2, 1, 3, 2, 1,
+ 4, 2, 1, 2, 4, 1, 4, 2, 4, 2, 4, 4, 0, 3, 1, 1, 3, 1, 2, 3, 1, 3, 3, 1, 4, 3, 1, 3, 4, 1, 4, 3, 4, 3, 4, 4, 0, 0, 2,
+ 1, 0, 2, 2, 0, 2, 3, 0, 2, 4, 0, 2, 0, 4, 2, 2, 0, 4, 3, 0, 4, 0, 1, 2, 1, 1, 2, 2, 1, 2, 3, 1, 2, 4, 1, 2, 1, 4, 2,
+ 2, 1, 4, 3, 1, 4, 0, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 4, 2, 2, 2, 4, 3, 2, 4, 0, 3, 2, 1, 3, 2, 2, 3, 2,
+ 3, 3, 2, 4, 3, 2, 3, 4, 2, 2, 3, 4, 3, 3, 4, 0, 0, 3, 1, 0, 3, 2, 0, 3, 3, 0, 3, 4, 0, 3, 0, 4, 3, 0, 0, 4, 1, 0, 4,
+ 0, 1, 3, 1, 1, 3, 2, 1, 3, 3, 1, 3, 4, 1, 3, 1, 4, 3, 0, 1, 4, 1, 1, 4, 0, 2, 3, 1, 2, 3, 2, 2, 3, 3, 2, 3, 4, 2, 3,
+ 2, 4, 3, 0, 2, 4, 1, 2, 4, 0, 3, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 4, 3, 0, 3, 4, 1, 3, 4
+ ];
+
+ ///
+ /// The maximum ranges for BISE encoding.
+ ///
+ ///
+ /// These are the numbers between 1 and
+ /// that can be represented exactly as a number in the ranges
+ /// [0, 2^k), [0, 3 * 2^k), and [0, 5 * 2^k).
+ ///
+ internal static readonly int[] MaxRanges = [1, 2, 3, 4, 5, 7, 9, 11, 15, 19, 23, 31, 39, 47, 63, 79, 95, 127, 159, 191, 255];
+
+ // Encoding modes tried in descending alphabet size when picking the most space-efficient
+ // BISE packing for a given range (see InitPackingModeCache).
+ private static readonly BiseEncodingMode[] EncodingModesDescending =
+ [
+ BiseEncodingMode.QuintEncoding,
+ BiseEncodingMode.TritEncoding,
+ BiseEncodingMode.BitEncoding,
+ ];
+
+ private static readonly (BiseEncodingMode Mode, int BitCount)[] PackingModeCache = InitPackingModeCache();
+
+ ///
+ /// The number of bits needed to encode the given number of values with respect to the
+ /// number of trits, quints, and bits specified by .
+ ///
+ public static (BiseEncodingMode Mode, int BitCount) GetPackingModeBitCount(int range)
+ {
+ Guard.MustBeGreaterThan(range, 0, nameof(range));
+ Guard.MustBeLessThan(range, 1 << Log2MaxRangeForBits, nameof(range));
+
+ return PackingModeCache[range];
+ }
+
+ ///
+ /// Unchecked variant of for hot-path use where
+ /// is known to be in [1, 255] (the ASTC spec-valid range).
+ /// Skips argument validation — about two branches per call, which add up on the ~500K
+ /// BISE-decode calls a typical image requires.
+ ///
+ internal static (BiseEncodingMode Mode, int BitCount) GetPackingModeBitCountUnchecked(int range)
+ => PackingModeCache[range];
+
+ ///
+ /// Returns the overall bit count for a range of values encoded
+ ///
+ public static int GetBitCount(BiseEncodingMode encodingMode, int valuesCount, int bitCount)
+ {
+ int encodingBitCount = encodingMode switch
+ {
+ BiseEncodingMode.TritEncoding => ((valuesCount * 8) + 4) / 5,
+ BiseEncodingMode.QuintEncoding => ((valuesCount * 7) + 2) / 3,
+ BiseEncodingMode.BitEncoding => 0,
+ _ => throw new ArgumentOutOfRangeException(nameof(encodingMode), "Invalid encoding mode"),
+ };
+ int baseBitCount = valuesCount * bitCount;
+
+ return encodingBitCount + baseBitCount;
+ }
+
+ ///
+ /// The number of bits needed to encode a given number of values within the range [0, ] (inclusive).
+ ///
+ public static int GetBitCountForRange(int valuesCount, int range)
+ {
+ (BiseEncodingMode mode, int bitCount) = GetPackingModeBitCount(range);
+
+ return GetBitCount(mode, valuesCount, bitCount);
+ }
+
+ ///
+ /// The size of a single ISE block in bits — the inverse of the packing computed by .
+ ///
+ public static int GetEncodedBlockSize(BiseEncodingMode mode, int bitCount)
+ {
+ (int blockSize, int extraBlockSize) = mode switch
+ {
+ BiseEncodingMode.TritEncoding => (5, 8),
+ BiseEncodingMode.QuintEncoding => (3, 7),
+ BiseEncodingMode.BitEncoding => (1, 0),
+ _ => (0, 0),
+ };
+
+ return extraBlockSize + (blockSize * bitCount);
+ }
+
+ private static (BiseEncodingMode, int)[] InitPackingModeCache()
+ {
+ (BiseEncodingMode, int)[] cache = new (BiseEncodingMode, int)[1 << Log2MaxRangeForBits];
+
+ // Precompute for all valid ranges [1, 255]
+ for (int range = 1; range < cache.Length; range++)
+ {
+ int index = -1;
+ for (int i = 0; i < MaxRanges.Length; i++)
+ {
+ if (MaxRanges[i] >= range)
+ {
+ index = i;
+ break;
+ }
+ }
+
+ int maxValue = index < 0
+ ? MaxRanges[^1] + 1
+ : MaxRanges[index] + 1;
+
+ // Check QuintEncoding (5), TritEncoding (3), BitEncoding (1) in descending order
+ BiseEncodingMode encodingMode = BiseEncodingMode.Unknown;
+ foreach (BiseEncodingMode em in EncodingModesDescending)
+ {
+ if (maxValue % (int)em == 0 && int.IsPow2(maxValue / (int)em))
+ {
+ encodingMode = em;
+ break;
+ }
+ }
+
+ if (encodingMode == BiseEncodingMode.Unknown)
+ {
+ throw new InvalidOperationException($"Invalid range for BISE encoding: {range}");
+ }
+
+ cache[range] = (encodingMode, int.Log2(maxValue / (int)encodingMode));
+ }
+
+ return cache;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceDecoder.cs
new file mode 100644
index 00000000..04316360
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/BoundedIntegerSequenceDecoder.cs
@@ -0,0 +1,145 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+
+///
+/// BISE decoder (ASTC spec §C.2.12) for bounded integer sequences. Stateless: callers pass
+/// the BISE encoding mode and mantissa bit count directly (both typically already on hand
+/// from ).
+///
+internal static class BoundedIntegerSequenceDecoder
+{
+ ///
+ /// Decodes a sequence of bounded integers into a caller-provided span.
+ ///
+ /// The BISE encoding mode (bits, trits, or quints).
+ /// The number of mantissa bits per value (from the BISE packing).
+ /// The number of values to decode.
+ /// The source of values to decode from.
+ /// The span to write decoded values into.
+ /// Thrown when the encoded block size is too large.
+ /// Thrown when there are not enough bits to decode.
+ public static void Decode(BiseEncodingMode encoding, int bitCount, int valuesCount, ref BitStream bitSource, Span result)
+ {
+ int totalBitCount = BoundedIntegerSequenceCodec.GetBitCount(encoding, valuesCount, bitCount);
+ int bitsPerBlock = BoundedIntegerSequenceCodec.GetEncodedBlockSize(encoding, bitCount);
+ Guard.MustBeLessThan(bitsPerBlock, 64, nameof(bitsPerBlock));
+
+ // Fixed 5 ints (20 bytes) — one BISE block holds at most 5 trits or 3 quints (spec §C.2.12).
+ Span blockResult = stackalloc int[5];
+ int resultIndex = 0;
+ int bitsRemaining = totalBitCount;
+
+ while (bitsRemaining > 0)
+ {
+ int bitsToRead = Math.Min(bitsRemaining, bitsPerBlock);
+ if (!bitSource.TryGetBits(bitsToRead, out ulong blockBits))
+ {
+ throw new InvalidOperationException("Not enough bits in BitStream to decode BISE block");
+ }
+
+ if (encoding == BiseEncodingMode.BitEncoding)
+ {
+ if (resultIndex < valuesCount)
+ {
+ result[resultIndex++] = (int)blockBits;
+ }
+ }
+ else
+ {
+ int decoded = DecodeISEBlock(encoding, blockBits, bitCount, blockResult);
+ for (int i = 0; i < decoded && resultIndex < valuesCount; ++i)
+ {
+ result[resultIndex++] = blockResult[i];
+ }
+ }
+
+ bitsRemaining -= bitsPerBlock;
+ }
+
+ if (resultIndex < valuesCount)
+ {
+ throw new InvalidOperationException("Decoded fewer values than expected from BISE block");
+ }
+ }
+
+ ///
+ /// Decodes one trit/quint BISE block (ASTC spec §C.2.12) into .
+ /// Returns the number of values written (5 for trits, 3 for quints). Uses direct bit
+ /// extraction (no BitStream) and flat encoding tables for speed.
+ ///
+ private static int DecodeISEBlock(BiseEncodingMode mode, ulong encodedBlock, int encodedBitCount, Span result)
+ {
+ ulong mantissaMask = (1UL << encodedBitCount) - 1;
+ return mode == BiseEncodingMode.TritEncoding
+ ? DecodeTritBlock(encodedBlock, encodedBitCount, mantissaMask, result)
+ : DecodeQuintBlock(encodedBlock, encodedBitCount, mantissaMask, result);
+ }
+
+ ///
+ /// Decodes a five-value trit block. The ASTC spec §C.2.12 layout interleaves mantissas
+ /// and an 8-bit packed trit selector as [m0, t0(2), m1, t1(2), m2, t2(1), m3, t3(2), m4, t4(1)].
+ /// The 8 selector bits look up a row in the pre-flattened trit encoding table.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int DecodeTritBlock(ulong encodedBlock, int encodedBitCount, ulong mantissaMask, Span result)
+ {
+ int bitPosition = 0;
+ int mantissa0 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ ulong encodedTrits = (encodedBlock >> bitPosition) & 0x3;
+ bitPosition += 2;
+ int mantissa1 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ encodedTrits |= ((encodedBlock >> bitPosition) & 0x3) << 2;
+ bitPosition += 2;
+ int mantissa2 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ encodedTrits |= ((encodedBlock >> bitPosition) & 0x1) << 4;
+ bitPosition += 1;
+ int mantissa3 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ encodedTrits |= ((encodedBlock >> bitPosition) & 0x3) << 5;
+ bitPosition += 2;
+ int mantissa4 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ encodedTrits |= ((encodedBlock >> (bitPosition + encodedBitCount)) & 0x1) << 7;
+
+ int tritTableBase = (int)encodedTrits * 5;
+ result[0] = (BoundedIntegerSequenceCodec.FlatTritEncodings[tritTableBase] << encodedBitCount) | mantissa0;
+ result[1] = (BoundedIntegerSequenceCodec.FlatTritEncodings[tritTableBase + 1] << encodedBitCount) | mantissa1;
+ result[2] = (BoundedIntegerSequenceCodec.FlatTritEncodings[tritTableBase + 2] << encodedBitCount) | mantissa2;
+ result[3] = (BoundedIntegerSequenceCodec.FlatTritEncodings[tritTableBase + 3] << encodedBitCount) | mantissa3;
+ result[4] = (BoundedIntegerSequenceCodec.FlatTritEncodings[tritTableBase + 4] << encodedBitCount) | mantissa4;
+ return 5;
+ }
+
+ ///
+ /// Decodes a three-value quint block (ASTC spec §C.2.12). The 7-bit packed quint
+ /// selector is interleaved as [m0, q0(3), m1, q1(2), m2, q2(2)] and indexes a row in
+ /// the pre-flattened quint encoding table.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int DecodeQuintBlock(ulong encodedBlock, int encodedBitCount, ulong mantissaMask, Span result)
+ {
+ int bitPosition = 0;
+ int mantissa0 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ ulong encodedQuints = (encodedBlock >> bitPosition) & 0x7;
+ bitPosition += 3;
+ int mantissa1 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ bitPosition += encodedBitCount;
+ encodedQuints |= ((encodedBlock >> bitPosition) & 0x3) << 3;
+ bitPosition += 2;
+ int mantissa2 = (int)((encodedBlock >> bitPosition) & mantissaMask);
+ encodedQuints |= ((encodedBlock >> (bitPosition + encodedBitCount)) & 0x3) << 5;
+
+ int quintTableBase = (int)encodedQuints * 3;
+ result[0] = (BoundedIntegerSequenceCodec.FlatQuintEncodings[quintTableBase] << encodedBitCount) | mantissa0;
+ result[1] = (BoundedIntegerSequenceCodec.FlatQuintEncodings[quintTableBase + 1] << encodedBitCount) | mantissa1;
+ result[2] = (BoundedIntegerSequenceCodec.FlatQuintEncodings[quintTableBase + 2] << encodedBitCount) | mantissa2;
+ return 3;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/BitQuantizationMap.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/BitQuantizationMap.cs
new file mode 100644
index 00000000..5b9ecb71
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/BitQuantizationMap.cs
@@ -0,0 +1,75 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+
+///
+/// Builds instances for the pure-bit BISE encoding mode
+/// (no trits/quints). Bit-replicates each quantized value up to totalUnquantizedBits
+/// width to derive its unquantized form. Used for both endpoint colour unquantization
+/// (ASTC spec §C.2.13) and weight unquantization (§C.2.17).
+///
+internal static class BitQuantizationMap
+{
+ /// Inclusive upper bound of the quantized slot index. range + 1
+ /// must be a power of two.
+ /// Bit width of the unquantized output: 8 for endpoint
+ /// values, 6 for weights.
+ public static QuantizationMap Create(int range, int totalUnquantizedBits)
+ {
+ Guard.IsTrue(CountOnes(range + 1) == 1, nameof(range), "range + 1 must be a power of two.");
+
+ int bitCount = QuantizationMap.Log2Floor(range + 1);
+ List unquantization = [];
+ List quantization = [];
+
+ for (int bits = 0; bits <= range; bits++)
+ {
+ int unquantized = bits;
+ int unquantizedBitCount = bitCount;
+ while (unquantizedBitCount < totalUnquantizedBits)
+ {
+ int destinationShiftUp = Math.Min(bitCount, totalUnquantizedBits - unquantizedBitCount);
+ int sourceShiftDown = bitCount - destinationShiftUp;
+ unquantized <<= destinationShiftUp;
+ unquantized |= bits >> sourceShiftDown;
+ unquantizedBitCount += destinationShiftUp;
+ }
+
+ if (unquantizedBitCount != totalUnquantizedBits)
+ {
+ throw new InvalidOperationException();
+ }
+
+ unquantization.Add(unquantized);
+
+ if (bits > 0)
+ {
+ int previousUnquantized = unquantization[bits - 1];
+ while (quantization.Count <= (previousUnquantized + unquantized) / 2)
+ {
+ quantization.Add(bits - 1);
+ }
+ }
+
+ while (quantization.Count <= unquantized)
+ {
+ quantization.Add(bits);
+ }
+ }
+
+ return new QuantizationMap([.. quantization], [.. unquantization]);
+ }
+
+ private static int CountOnes(int value)
+ {
+ int count = 0;
+ while (value != 0)
+ {
+ count += value & 1;
+ value >>= 1;
+ }
+
+ return count;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/Quantization.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/Quantization.cs
new file mode 100644
index 00000000..ce56a22a
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/Quantization.cs
@@ -0,0 +1,235 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+
+internal static class Quantization
+{
+ public const int EndpointRangeMinValue = 5;
+ public const int WeightRangeMaxValue = 31;
+
+ private static readonly SortedDictionary EndpointMaps = InitEndpointMaps();
+ private static readonly SortedDictionary WeightMaps = InitWeightMaps();
+
+ // Flat lookup tables indexed by range value for O(1) access.
+ // Each slot maps to the QuantizationMap for the greatest supported range <= that index.
+ private static readonly QuantizationMap?[] EndpointMapByRange = BuildFlatLookup(EndpointMaps, 256);
+ private static readonly QuantizationMap?[] WeightMapByRange = BuildFlatLookup(WeightMaps, 32);
+
+ // Pre-computed flat tables for weight unquantization: entry[quantizedValue] = final unquantized weight.
+ // Includes the dq > 32 -> dq + 1 adjustment. Indexed by weight range.
+ // Valid ranges: 1, 2, 3, 4, 5, 7, 9, 11, 15, 19, 23, 31
+ private static readonly int[]?[] UnquantizeWeightsFlat = InitializeUnquantizeWeightsFlat();
+
+ // Pre-computed flat tables for endpoint unquantization.
+ // Indexed by range value. Valid ranges: 5, 7, 9, 11, 15, 19, 23, 31, 39, 47, 63, 79, 95, 127, 159, 191, 255
+ private static readonly int[]?[] UnquantizeEndpointsFlat = InitializeUnquantizeEndpointsFlat();
+
+ public static int QuantizeCEValueToRange(int value, int rangeMaxValue)
+ {
+ Guard.MustBeBetweenOrEqualTo(rangeMaxValue, EndpointRangeMinValue, byte.MaxValue, nameof(rangeMaxValue));
+ Guard.MustBeBetweenOrEqualTo(value, 0, byte.MaxValue, nameof(value));
+
+ return GetQuantMapForValueRange(rangeMaxValue).Quantize(value);
+ }
+
+ public static int UnquantizeCEValueFromRange(int value, int rangeMaxValue)
+ {
+ Guard.MustBeBetweenOrEqualTo(rangeMaxValue, EndpointRangeMinValue, byte.MaxValue, nameof(rangeMaxValue));
+ Guard.MustBeBetweenOrEqualTo(value, 0, rangeMaxValue, nameof(value));
+
+ return GetQuantMapForValueRange(rangeMaxValue).Unquantize(value);
+ }
+
+ public static int QuantizeWeightToRange(int weight, int rangeMaxValue)
+ {
+ Guard.MustBeBetweenOrEqualTo(rangeMaxValue, 1, WeightRangeMaxValue, nameof(rangeMaxValue));
+ Guard.MustBeBetweenOrEqualTo(weight, 0, 64, nameof(weight));
+
+ // ASTC spec §C.2.17: weight slot 33 is unused; collapse 34..64 to 33..63 before
+ // table lookup. The inverse (dequantized > 32 = +1) lives in UnquantizeWeightsFlat.
+ if (weight > 33)
+ {
+ weight -= 1;
+ }
+
+ return GetQuantMapForWeightRange(rangeMaxValue).Quantize(weight);
+ }
+
+ public static int UnquantizeWeightFromRange(int weight, int rangeMaxValue)
+ {
+ Guard.MustBeBetweenOrEqualTo(rangeMaxValue, 1, WeightRangeMaxValue, nameof(rangeMaxValue));
+ Guard.MustBeBetweenOrEqualTo(weight, 0, rangeMaxValue, nameof(weight));
+
+ int dequantized = GetQuantMapForWeightRange(rangeMaxValue).Unquantize(weight);
+ if (dequantized > 32)
+ {
+ dequantized += 1;
+ }
+
+ return dequantized;
+ }
+
+ ///
+ /// Batch unquantize: uses pre-computed flat table for O(1) lookup per value.
+ /// No per-call validation, no conditional branch per weight.
+ ///
+ ///
+ /// Thrown when has no associated unquantization table — would
+ /// only happen on a malformed block that escaped 's
+ /// spec-bound checks.
+ ///
+ internal static void UnquantizeWeightsBatch(Span weights, int range)
+ {
+ int[]? table = UnquantizeWeightsFlat[range];
+ Guard.NotNull(table, nameof(range));
+
+ for (int i = 0; i < weights.Length; i++)
+ {
+ weights[i] = table[weights[i]];
+ }
+ }
+
+ ///
+ /// Batch unquantize color endpoint values: uses pre-computed flat table.
+ /// No per-call validation, single array lookup per value.
+ ///
+ ///
+ /// Thrown when has no associated unquantization table —
+ /// would only happen on a malformed block that escaped 's
+ /// spec-bound checks.
+ ///
+ internal static void UnquantizeCEValuesBatch(Span values, int rangeMaxValue)
+ {
+ int[]? table = UnquantizeEndpointsFlat[rangeMaxValue];
+ Guard.NotNull(table, nameof(rangeMaxValue));
+
+ for (int i = 0; i < values.Length; i++)
+ {
+ values[i] = table[values[i]];
+ }
+ }
+
+ private static SortedDictionary InitEndpointMaps()
+ => new()
+ {
+ { 5, TritQuantizationMap.Create(5, TritQuantizationMap.GetUnquantizedValue) },
+ { 7, BitQuantizationMap.Create(7, 8) },
+ { 9, QuintQuantizationMap.Create(9, QuintQuantizationMap.GetUnquantizedValue) },
+ { 11, TritQuantizationMap.Create(11, TritQuantizationMap.GetUnquantizedValue) },
+ { 15, BitQuantizationMap.Create(15, 8) },
+ { 19, QuintQuantizationMap.Create(19, QuintQuantizationMap.GetUnquantizedValue) },
+ { 23, TritQuantizationMap.Create(23, TritQuantizationMap.GetUnquantizedValue) },
+ { 31, BitQuantizationMap.Create(31, 8) },
+ { 39, QuintQuantizationMap.Create(39, QuintQuantizationMap.GetUnquantizedValue) },
+ { 47, TritQuantizationMap.Create(47, TritQuantizationMap.GetUnquantizedValue) },
+ { 63, BitQuantizationMap.Create(63, 8) },
+ { 79, QuintQuantizationMap.Create(79, QuintQuantizationMap.GetUnquantizedValue) },
+ { 95, TritQuantizationMap.Create(95, TritQuantizationMap.GetUnquantizedValue) },
+ { 127, BitQuantizationMap.Create(127, 8) },
+ { 159, QuintQuantizationMap.Create(159, QuintQuantizationMap.GetUnquantizedValue) },
+ { 191, TritQuantizationMap.Create(191, TritQuantizationMap.GetUnquantizedValue) },
+ { 255, BitQuantizationMap.Create(255, 8) },
+ };
+
+ private static SortedDictionary InitWeightMaps()
+ => new()
+ {
+ { 1, BitQuantizationMap.Create(1, 6) },
+ { 2, TritQuantizationMap.Create(2, TritQuantizationMap.GetUnquantizedWeight) },
+ { 3, BitQuantizationMap.Create(3, 6) },
+ { 4, QuintQuantizationMap.Create(4, QuintQuantizationMap.GetUnquantizedWeight) },
+ { 5, TritQuantizationMap.Create(5, TritQuantizationMap.GetUnquantizedWeight) },
+ { 7, BitQuantizationMap.Create(7, 6) },
+ { 9, QuintQuantizationMap.Create(9, QuintQuantizationMap.GetUnquantizedWeight) },
+ { 11, TritQuantizationMap.Create(11, TritQuantizationMap.GetUnquantizedWeight) },
+ { 15, BitQuantizationMap.Create(15, 6) },
+ { 19, QuintQuantizationMap.Create(19, QuintQuantizationMap.GetUnquantizedWeight) },
+ { 23, TritQuantizationMap.Create(23, TritQuantizationMap.GetUnquantizedWeight) },
+ { 31, BitQuantizationMap.Create(31, 6) },
+ };
+
+ private static QuantizationMap?[] BuildFlatLookup(SortedDictionary maps, int size)
+ {
+ QuantizationMap?[] flat = new QuantizationMap?[size];
+ QuantizationMap? current = null;
+ for (int i = 0; i < size; i++)
+ {
+ if (maps.TryGetValue(i, out QuantizationMap? map))
+ {
+ current = map;
+ }
+
+ flat[i] = current;
+ }
+
+ return flat;
+ }
+
+ ///
+ /// Returns the endpoint for the given range. Callers must
+ /// have already validated that is within
+ /// [, byte.MaxValue]; the public methods on
+ /// do this. Throws if the slot has no associated map.
+ ///
+ ///
+ /// Thrown when is outside the valid endpoint range.
+ ///
+ private static QuantizationMap GetQuantMapForValueRange(int r)
+ => (uint)r < (uint)EndpointMapByRange.Length && EndpointMapByRange[r] is { } map
+ ? map
+ : throw new ArgumentOutOfRangeException(nameof(r), r, "No endpoint quantization map for this range");
+
+ ///
+ /// Returns the weight for the given range. Callers must
+ /// have already validated that is within
+ /// [1, ]; the public methods on
+ /// do this. Throws if the slot has no associated map.
+ ///
+ ///
+ /// Thrown when is outside the valid weight range.
+ ///
+ private static QuantizationMap GetQuantMapForWeightRange(int r)
+ => (uint)r < (uint)WeightMapByRange.Length && WeightMapByRange[r] is { } map
+ ? map
+ : throw new ArgumentOutOfRangeException(nameof(r), r, "No weight quantization map for this range");
+
+ private static int[]?[] InitializeUnquantizeWeightsFlat()
+ {
+ int[]?[] tables = new int[]?[WeightRangeMaxValue + 1];
+ foreach (KeyValuePair kvp in WeightMaps)
+ {
+ int range = kvp.Key;
+ QuantizationMap map = kvp.Value;
+ int[] table = new int[range + 1];
+ for (int i = 0; i <= range; i++)
+ {
+ int dequantized = map.Unquantize(i);
+ table[i] = dequantized > 32 ? dequantized + 1 : dequantized;
+ }
+
+ tables[range] = table;
+ }
+
+ return tables;
+ }
+
+ private static int[]?[] InitializeUnquantizeEndpointsFlat()
+ {
+ int[]?[] tables = new int[]?[256];
+ foreach (KeyValuePair kvp in EndpointMaps)
+ {
+ int range = kvp.Key;
+ QuantizationMap map = kvp.Value;
+ int[] table = new int[range + 1];
+ for (int i = 0; i <= range; i++)
+ {
+ table[i] = map.Unquantize(i);
+ }
+
+ tables[range] = table;
+ }
+
+ return tables;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuantizationMap.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuantizationMap.cs
new file mode 100644
index 00000000..50a04607
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuantizationMap.cs
@@ -0,0 +1,78 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+
+///
+/// Pre-computed quantize/unquantize lookup tables for a single ASTC quantization range.
+/// Both arrays are constructed once and the instance is immutable thereafter, built via
+/// , , or .
+///
+internal sealed class QuantizationMap
+{
+ private readonly int[] quantizationMap;
+ private readonly int[] unquantizationMap;
+
+ /// Length 256 (or shorter); maps an unquantized value to its
+ /// nearest quantized slot.
+ /// Length range + 1; maps a quantized slot back to
+ /// its unquantized value.
+ public QuantizationMap(int[] quantizationMap, int[] unquantizationMap)
+ {
+ this.quantizationMap = quantizationMap;
+ this.unquantizationMap = unquantizationMap;
+ }
+
+ public int Quantize(int x)
+ => (uint)x < (uint)this.quantizationMap.Length
+ ? this.quantizationMap[x]
+ : 0;
+
+ public int Unquantize(int x)
+ => (uint)x < (uint)this.unquantizationMap.Length
+ ? this.unquantizationMap[x]
+ : 0;
+
+ internal static int Log2Floor(int value)
+ {
+ int result = 0;
+ while ((1 << (result + 1)) <= value)
+ {
+ result++;
+ }
+
+ return result;
+ }
+
+ ///
+ /// Builds a quantize-table from an already-populated unquantize-table by, for every
+ /// unquantized value in [0, 255], picking the index in
+ /// whose value is closest. Used by and
+ /// ; builds its
+ /// quantize table inline because the structure of bit-replication makes the closest
+ /// match analytically derivable without a search.
+ ///
+ internal static int[] BuildQuantizationMapFromUnquantized(int[] unquantized)
+ {
+ int[] quantization = new int[256];
+ for (int i = 0; i < 256; ++i)
+ {
+ int bestIndex = 0;
+ int bestScore = int.MaxValue;
+ for (int index = 0; index < unquantized.Length; ++index)
+ {
+ int diff = i - unquantized[index];
+ int score = diff * diff;
+ if (score < bestScore)
+ {
+ bestIndex = index;
+ bestScore = score;
+ }
+ }
+
+ quantization[i] = bestIndex;
+ }
+
+ return quantization;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuintQuantizationMap.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuintQuantizationMap.cs
new file mode 100644
index 00000000..5ce08b46
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/QuintQuantizationMap.cs
@@ -0,0 +1,76 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+
+///
+/// Builds instances for the quint BISE encoding mode plus the
+/// per-quint unquantization tables for endpoint colour values (ASTC spec §C.2.13) and
+/// weights (§C.2.17).
+///
+internal static class QuintQuantizationMap
+{
+ /// Inclusive upper bound of the quantized slot index. range + 1
+ /// must be divisible by 5.
+ /// Per-quint unquantization function — typically
+ /// or .
+ public static QuantizationMap Create(int range, Func unquantFunc)
+ {
+ Guard.IsTrue((range + 1) % 5 == 0, nameof(range), "range + 1 must be a multiple of 5.");
+
+ int bitsPowerOfTwo = (range + 1) / 5;
+ int bitCount = bitsPowerOfTwo == 0 ? 0 : QuantizationMap.Log2Floor(bitsPowerOfTwo);
+
+ int[] unquantization = new int[5 * (1 << bitCount)];
+ int idx = 0;
+ for (int quint = 0; quint < 5; ++quint)
+ {
+ for (int bits = 0; bits < (1 << bitCount); ++bits)
+ {
+ unquantization[idx++] = unquantFunc(quint, bits, range);
+ }
+ }
+
+ int[] quantization = QuantizationMap.BuildQuantizationMapFromUnquantized(unquantization);
+ return new QuantizationMap(quantization, unquantization);
+ }
+
+ internal static int GetUnquantizedValue(int quint, int bits, int range)
+ {
+ int a = (bits & 1) != 0 ? 0x1FF : 0;
+ (int b, int c) = range switch
+ {
+ 9 => (0, 113),
+ 19 => ((bits >> 1) & 0x1) is var x ? ((x << 2) | (x << 3) | (x << 8), 54) : default,
+ 39 => ((bits >> 1) & 0x3) is var x ? ((x >> 1) | (x << 1) | (x << 7), 26) : default,
+ 79 => ((bits >> 1) & 0x7) is var x ? ((x >> 1) | (x << 6), 13) : default,
+ 159 => ((bits >> 1) & 0xF) is var x ? ((x >> 3) | (x << 5), 6) : default,
+ _ => throw new ArgumentException("Illegal quint encoding")
+ };
+ int t = (quint * c) + b;
+ t ^= a;
+ t = (a & 0x80) | (t >> 2);
+ return t;
+ }
+
+ internal static int GetUnquantizedWeight(int quint, int bits, int range)
+ {
+ if (range == 4)
+ {
+ int[] weights = [0, 16, 32, 47, 63];
+ return weights[quint];
+ }
+
+ int a = (bits & 1) != 0 ? 0x7F : 0;
+ (int b, int c) = range switch
+ {
+ 9 => (0, 28),
+ 19 => ((bits >> 1) & 0x1) is var x ? ((x << 1) | (x << 6), 13) : default,
+ _ => throw new ArgumentException("Illegal quint encoding")
+ };
+ int t = (quint * c) + b;
+ t ^= a;
+ t = (a & 0x20) | (t >> 2);
+ return t;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/TritQuantizationMap.cs b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/TritQuantizationMap.cs
new file mode 100644
index 00000000..d40ad9e2
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BiseEncoding/Quantize/TritQuantizationMap.cs
@@ -0,0 +1,85 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+
+///
+/// Builds instances for the trit BISE encoding mode plus the
+/// per-trit unquantization tables for endpoint colour values (ASTC spec §C.2.13) and
+/// weights (§C.2.17).
+///
+internal static class TritQuantizationMap
+{
+ /// Inclusive upper bound of the quantized slot index. range + 1
+ /// must be divisible by 3.
+ /// Per-trit unquantization function — typically
+ /// or .
+ public static QuantizationMap Create(int range, Func unquantFunc)
+ {
+ Guard.IsTrue((range + 1) % 3 == 0, nameof(range), "range + 1 must be a multiple of 3.");
+
+ int bitsPowerOfTwo = (range + 1) / 3;
+ int bitCount = bitsPowerOfTwo == 0 ? 0 : QuantizationMap.Log2Floor(bitsPowerOfTwo);
+
+ int[] unquantization = new int[3 * (1 << bitCount)];
+ int idx = 0;
+ for (int trit = 0; trit < 3; ++trit)
+ {
+ for (int bits = 0; bits < (1 << bitCount); ++bits)
+ {
+ unquantization[idx++] = unquantFunc(trit, bits, range);
+ }
+ }
+
+ int[] quantization = QuantizationMap.BuildQuantizationMapFromUnquantized(unquantization);
+ return new QuantizationMap(quantization, unquantization);
+ }
+
+ internal static int GetUnquantizedValue(int trit, int bits, int range)
+ {
+ int a = (bits & 1) != 0 ? 0x1FF : 0;
+ (int b, int c) = range switch
+ {
+ 5 => (0, 204),
+ 11 => ((bits >> 1) & 0x1) is var x ? ((x << 1) | (x << 2) | (x << 4) | (x << 8), 93) : default,
+ 23 => ((bits >> 1) & 0x3) is var x ? (x | (x << 2) | (x << 7), 44) : default,
+ 47 => ((bits >> 1) & 0x7) is var x ? (x | (x << 6), 22) : default,
+ 95 => ((bits >> 1) & 0xF) is var x ? ((x >> 2) | (x << 5), 11) : default,
+ 191 => ((bits >> 1) & 0x1F) is var x ? ((x >> 4) | (x << 4), 5) : default,
+ _ => throw new ArgumentException("Illegal trit encoding")
+ };
+ int t = (trit * c) + b;
+ t ^= a;
+ t = (a & 0x80) | (t >> 2);
+ return t;
+ }
+
+ internal static int GetUnquantizedWeight(int trit, int bits, int range)
+ {
+ if (range == 2)
+ {
+ return trit switch
+ {
+ 0 => 0,
+ 1 => 32,
+ _ => 63
+ };
+ }
+
+ int a = (bits & 1) != 0 ? 0x7F : 0;
+ (int b, int c) = range switch
+ {
+ 5 => (0, 50),
+ 11 => ((bits >> 1) & 1) is var x
+ ? (x | (x << 2) | (x << 6), 23)
+ : default,
+ 23 => ((bits >> 1) & 0x3) is var x
+ ? (x | (x << 5), 11)
+ : default,
+ _ => throw new ArgumentException("Illegal trit encoding")
+ };
+ int t = (trit * c) + b;
+ t ^= a;
+ return (a & 0x20) | (t >> 2);
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockDestination.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockDestination.cs
new file mode 100644
index 00000000..c3c16387
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockDestination.cs
@@ -0,0 +1,12 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// Destination pixel rectangle for one ASTC block in the output image: the top-left pixel
+/// offset, the clipped copy extents (equal to the footprint for interior blocks, smaller
+/// for right/bottom edge blocks), and a flag set when the block's full footprint fits in
+/// the image and the fused direct-to-image fast path is usable.
+///
+internal readonly record struct BlockDestination(int DstBaseX, int DstBaseY, int CopyWidth, int CopyHeight, bool IsFullInteriorBlock);
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockModeDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockModeDecoder.cs
new file mode 100644
index 00000000..2040f5cd
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/BlockModeDecoder.cs
@@ -0,0 +1,396 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// Single-pass parser for the 128-bit ASTC block mode (spec §C.2.9–§C.2.11, §C.2.16). Produces a
+/// populated record describing the block's weight grid, partition
+/// count, colour endpoint modes, dual-plane flag, and the bit-range metadata the per-block
+/// decoders need. Reserved and illegal encodings are rejected inline (IsValid = false).
+///
+internal static class BlockModeDecoder
+{
+ // Spec §C.2.10 Table C.2.7: weight range table indexed by r[2:0] + h. Entries marked -1
+ // are reserved and reject the block. Two six-entry groups (low precision, high precision).
+ private static ReadOnlySpan WeightRanges
+ => [-1, -1, 1, 2, 3, 4, 5, 7, -1, -1, 9, 11, 15, 19, 23, 31];
+
+ // Spec §C.2.11: extra-CEM bit count by partition count. Indexed [partitionCount - 1].
+ private static ReadOnlySpan ExtraCemBitsForPartition => [0, 2, 5, 8];
+
+ // Spec §C.2.22: valid BISE endpoint ranges in descending order. The parser picks the
+ // largest that fits in the colour bit budget computed by the §C.2.22 remaining-bits
+ // procedure.
+ private static ReadOnlySpan ValidEndpointRanges
+ => [255, 191, 159, 127, 95, 79, 63, 47, 39, 31, 23, 19, 15, 11, 9, 7, 5];
+
+ ///
+ /// Decodes all block-mode info from raw 128-bit ASTC block data in a single pass.
+ /// Returns a with IsValid = false if the block is illegal or
+ /// reserved, or with IsVoidExtent = true for void-extent blocks (spec §C.2.23).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ public static BlockInfo Decode(UInt128 bits)
+ {
+ ulong lowBits = bits.Low();
+
+ // Void extent: bits[0:9] == 0x1FC (9 bits). See ASTC spec §C.2.23.
+ if ((lowBits & 0x1FF) == 0x1FC)
+ {
+ // Bit 9 is the dynamic-range flag: 1 = HDR (FP16), 0 = LDR (UNORM16).
+ bool voidExtentIsHdr = (lowBits & (1UL << 9)) != 0;
+ return IsVoidExtentWellFormed(bits, lowBits)
+ ? new BlockInfo(
+ isVoidExtent: true,
+ isHdr: voidExtentIsHdr,
+ weights: default,
+ partitionCount: 0,
+ dualPlane: default,
+ colors: default,
+ endpointModes: default)
+ : BlockInfo.MalformedVoidExtent;
+ }
+
+ if (!TryDecodeWeightGrid(lowBits, out int gridWidth, out int gridHeight, out uint rBits, out bool isWidthA6HeightB6))
+ {
+ return default;
+ }
+
+ if (!TryResolveWeightRange(lowBits, rBits, isWidthA6HeightB6, out int weightRange))
+ {
+ return default;
+ }
+
+ // WidthA6HeightB6 mode never has dual plane; otherwise check bit 10.
+ bool isDualPlane = !isWidthA6HeightB6 && ((lowBits >> 10) & 1) != 0;
+ int partitionCount = 1 + (int)((lowBits >> 11) & 0x3);
+
+ if (!TryComputeWeightBitCount(gridWidth, gridHeight, isDualPlane, partitionCount, weightRange, out int weightBitCount))
+ {
+ return default;
+ }
+
+ // Fixed 4 entries (max partition count per spec §C.2.10)
+ Span cems = stackalloc ColorEndpointMode[4];
+ int colorValuesCount = DecodeEndpointModes(bits, lowBits, partitionCount, weightBitCount, cems, out int numExtraCEMBits);
+ if (colorValuesCount is < 0 or > 18)
+ {
+ return default;
+ }
+
+ // Dual plane and color bit positions depend on weight + extra-CEM bit allocation.
+ int dualPlaneBitStartPos = 128 - weightBitCount - numExtraCEMBits;
+ if (isDualPlane)
+ {
+ dualPlaneBitStartPos -= 2;
+ }
+
+ int dualPlaneChannel = isDualPlane
+ ? (int)BitOperations.GetBits(bits, dualPlaneBitStartPos, 2).Low()
+ : -1;
+
+ int colorStartBit = (partitionCount == 1) ? 17 : 29;
+ int maxColorBits = dualPlaneBitStartPos - colorStartBit;
+
+ if (!TryFitColorRange(colorValuesCount, maxColorBits, out int colorValuesRange, out int colorBitCount))
+ {
+ return default;
+ }
+
+ BlockInfo.EndpointModeBuffer modes = default;
+ modes[0] = cems[0];
+ modes[1] = cems[1];
+ modes[2] = cems[2];
+ modes[3] = cems[3];
+
+ bool isHdr = false;
+ for (int i = 0; i < partitionCount; i++)
+ {
+ if (cems[i].IsHdr())
+ {
+ isHdr = true;
+ break;
+ }
+ }
+
+ return new BlockInfo(
+ isVoidExtent: false,
+ isHdr: isHdr,
+ weights: new WeightGrid(gridWidth, gridHeight, weightRange, weightBitCount),
+ partitionCount,
+ dualPlane: new DualPlaneInfo(isDualPlane, dualPlaneChannel),
+ colors: new ColorEndpoints(colorStartBit, colorBitCount, colorValuesRange, colorValuesCount),
+ endpointModes: modes);
+ }
+
+ ///
+ /// Decodes the block-mode / weight-grid dimensions section of the block mode per ASTC spec
+ /// §C.2.8 Table 24. Returns false for reserved block-mode encodings.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool TryDecodeWeightGrid(
+ ulong lowBits,
+ out int gridWidth,
+ out int gridHeight,
+ out uint rBits,
+ out bool isWidthA6HeightB6)
+ {
+ isWidthA6HeightB6 = false;
+
+ if ((lowBits & 0x3) != 0)
+ {
+ // bits[0..1] != 0 : layout A (modeBits = bits[2..3]).
+ ulong modeBits = (lowBits >> 2) & 0x3;
+ int a = (int)((lowBits >> 5) & 0x3);
+
+ (gridWidth, gridHeight) = modeBits switch
+ {
+ 0 => ((int)((lowBits >> 7) & 0x3) + 4, a + 2),
+ 1 => ((int)((lowBits >> 7) & 0x3) + 8, a + 2),
+ 2 => (a + 2, (int)((lowBits >> 7) & 0x3) + 8),
+ 3 when ((lowBits >> 8) & 1) != 0 => ((int)((lowBits >> 7) & 0x1) + 2, a + 2),
+ 3 => (a + 2, (int)((lowBits >> 7) & 0x1) + 6),
+ _ => default // unreachable — modeBits is 2 bits wide.
+ };
+
+ // Layout A: R0 = bit 4, R1 = bit 0, R2 = bit 1; pack as rBits = R2*4 + R1*2 + R0.
+ rBits = (uint)(((lowBits >> 4) & 1) | ((lowBits & 0x3) << 1));
+ return true;
+ }
+
+ // bits[0..1] == 0 : layout B (modeBits = bits[5..8]).
+ ulong layoutBBits = (lowBits >> 5) & 0xF;
+ int aLow = (int)((lowBits >> 5) & 0x3);
+
+ switch (layoutBBits)
+ {
+ case var _ when (layoutBBits & 0xC) == 0x0:
+ if ((lowBits & 0xF) == 0)
+ {
+ // Reserved: all of bits[0..4] are zero.
+ gridWidth = gridHeight = 0;
+ rBits = 0;
+ return false;
+ }
+
+ gridWidth = 12;
+ gridHeight = aLow + 2;
+ break;
+ case var _ when (layoutBBits & 0xC) == 0x4:
+ gridWidth = aLow + 2;
+ gridHeight = 12;
+ break;
+ case 0xC:
+ gridWidth = 6;
+ gridHeight = 10;
+ break;
+ case 0xD:
+ gridWidth = 10;
+ gridHeight = 6;
+ break;
+ case var _ when (layoutBBits & 0xC) == 0x8:
+ gridWidth = aLow + 6;
+ gridHeight = (int)((lowBits >> 9) & 0x3) + 6;
+ isWidthA6HeightB6 = true;
+ break;
+ default:
+ // Reserved block mode.
+ gridWidth = gridHeight = 0;
+ rBits = 0;
+ return false;
+ }
+
+ // Layout B: R0 = bit 4, R1 = bit 2, R2 = bit 3; pack as rBits = R2*4 + R1*2 + R0.
+ rBits = (uint)(((lowBits >> 4) & 1) | (((lowBits >> 2) & 0x3) << 1));
+ return true;
+ }
+
+ ///
+ /// Looks up the weight range from the 3-bit r selector plus the high-precision h bit per
+ /// ASTC spec §C.2.7 Table 23. Returns false if the resulting index points at a reserved slot.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool TryResolveWeightRange(ulong lowBits, uint rBits, bool isWidthA6HeightB6, out int weightRange)
+ {
+ uint hBit = isWidthA6HeightB6 ? 0u : (uint)((lowBits >> 9) & 1);
+ int rangeIdx = (int)((hBit << 3) | rBits);
+ if ((uint)rangeIdx >= (uint)WeightRanges.Length)
+ {
+ weightRange = 0;
+ return false;
+ }
+
+ weightRange = WeightRanges[rangeIdx];
+ return weightRange >= 0;
+ }
+
+ ///
+ /// Validates weight count constraints and resolves the weight bit count per ASTC spec
+ /// §C.2.11. Rejects blocks with more than 64 weights, illegal 4-partition-with-dual-plane
+ /// combos, and weight bit totals outside the [24, 96] window.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool TryComputeWeightBitCount(
+ int gridWidth,
+ int gridHeight,
+ bool isDualPlane,
+ int partitionCount,
+ int weightRange,
+ out int weightBitCount)
+ {
+ int numWeights = gridWidth * gridHeight;
+ if (isDualPlane)
+ {
+ numWeights *= 2;
+ }
+
+ // 4 partitions + dual plane is illegal per spec §C.2.11.
+ if (numWeights > 64 || (partitionCount == 4 && isDualPlane))
+ {
+ weightBitCount = 0;
+ return false;
+ }
+
+ weightBitCount = BoundedIntegerSequenceCodec.GetBitCountForRange(numWeights, weightRange);
+ return weightBitCount is >= 24 and <= 96;
+ }
+
+ ///
+ /// Decodes per-partition colour endpoint modes per ASTC spec §C.2.11 and returns the total
+ /// colour-values count. The shared-CEM and non-shared-CEM paths both populate
+ /// (length 4) and tell the caller how many extra CEM bits were
+ /// consumed, which affects subsequent bit layout.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int DecodeEndpointModes(
+ UInt128 bits,
+ ulong lowBits,
+ int partitionCount,
+ int weightBitCount,
+ Span cems,
+ out int numExtraCEMBits)
+ {
+ numExtraCEMBits = 0;
+
+ if (partitionCount == 1)
+ {
+ ColorEndpointMode mode = (ColorEndpointMode)((lowBits >> 13) & 0xF);
+ cems[0] = mode;
+ return mode.GetColorValuesCount();
+ }
+
+ // Multi-partition: either shared CEM (marker 0) or per-partition (non-zero marker).
+ ulong sharedCemMarker = (lowBits >> 23) & 0x3;
+ if (sharedCemMarker == 0)
+ {
+ ColorEndpointMode sharedCem = (ColorEndpointMode)((lowBits >> 25) & 0xF);
+ int colorValuesCount = 0;
+ for (int i = 0; i < partitionCount; i++)
+ {
+ cems[i] = sharedCem;
+ colorValuesCount += sharedCem.GetColorValuesCount();
+ }
+
+ return colorValuesCount;
+ }
+
+ numExtraCEMBits = ExtraCemBitsForPartition[partitionCount - 1];
+
+ int extraCemStartPos = 128 - numExtraCEMBits - weightBitCount;
+ UInt128 extraCem = BitOperations.GetBits(bits, extraCemStartPos, numExtraCEMBits);
+
+ ulong cemval = (lowBits >> 23) & 0x3F;
+ int baseCem = (int)(((cemval & 0x3) - 1) * 4);
+ cemval >>= 2;
+ ulong cembits = cemval | (extraCem.Low() << 4);
+
+ // 1 selector bit per partition (c[i]), then 2 mode bits per partition (m).
+ // Fixed 4 ints (16 bytes) — max partition count per spec §C.2.10.
+ Span c = stackalloc int[4];
+ for (int i = 0; i < partitionCount; i++)
+ {
+ c[i] = (int)(cembits & 0x1);
+ cembits >>= 1;
+ }
+
+ int total = 0;
+ for (int i = 0; i < partitionCount; i++)
+ {
+ int m = (int)(cembits & 0x3);
+ cembits >>= 2;
+ ColorEndpointMode mode = (ColorEndpointMode)(baseCem + (4 * c[i]) + m);
+ cems[i] = mode;
+ total += mode.GetColorValuesCount();
+ }
+
+ return total;
+ }
+
+ ///
+ /// Finds the greatest valid BISE endpoint range whose encoding fits within
+ /// per ASTC spec §C.2.22. Returns false if the minimum
+ /// encoding already exceeds the budget.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool TryFitColorRange(
+ int colorValuesCount,
+ int maxColorBits,
+ out int colorValuesRange,
+ out int colorBitCount)
+ {
+ // Spec §C.2.22 minimum: 13 bits per 5 color values, rounded up — derived from
+ // the smallest valid BISE encoding (range 5 = 1 trit + 1 bit, i.e. 8/5 + 1 ≈ 13/5
+ // bits per value).
+ int requiredColorBits = ((13 * colorValuesCount) + 4) / 5;
+ if (maxColorBits < requiredColorBits)
+ {
+ colorValuesRange = 0;
+ colorBitCount = 0;
+ return false;
+ }
+
+ foreach (int rv in ValidEndpointRanges)
+ {
+ int bitCount = BoundedIntegerSequenceCodec.GetBitCountForRange(colorValuesCount, rv);
+ if (bitCount <= maxColorBits)
+ {
+ colorValuesRange = rv;
+ colorBitCount = bitCount;
+ return true;
+ }
+ }
+
+ colorValuesRange = 0;
+ colorBitCount = 0;
+ return false;
+ }
+
+ ///
+ /// Inline void-extent validation per ASTC spec §C.2.23: reserved bits 10..11 must be 0x3,
+ /// and either the texel coordinates are all-ones (sentinel for "no constraint") or they
+ /// form two valid [min, max] pairs with min < max.
+ ///
+ private static bool IsVoidExtentWellFormed(UInt128 bits, ulong lowBits)
+ {
+ if (BitOperations.GetBits(bits, 10, 2).Low() != 0x3UL)
+ {
+ return false;
+ }
+
+ int c0 = (int)BitOperations.GetBits(lowBits, 12, 13);
+ int c1 = (int)BitOperations.GetBits(lowBits, 25, 13);
+ int c2 = (int)BitOperations.GetBits(lowBits, 38, 13);
+ int c3 = (int)BitOperations.GetBits(lowBits, 51, 13);
+
+ const int all1s = (1 << 13) - 1;
+ bool coordsAll1s = c0 == all1s && c1 == all1s && c2 == all1s && c3 == all1s;
+
+ return coordsAll1s || (c0 < c1 && c2 < c3);
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedBlockDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedBlockDecoder.cs
new file mode 100644
index 00000000..ecbae6f3
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedBlockDecoder.cs
@@ -0,0 +1,143 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// Shared decode core for the fused (zero-allocation) ASTC block decode pipeline.
+/// Contains BISE extraction and weight infill used by both LDR and HDR decoders.
+///
+internal static class FusedBlockDecoder
+{
+ ///
+ /// Shared decode core for the fused fast paths. Performs the per-block stages described
+ /// in ASTC spec §C.2.7 (overall block decode procedure) in one inlined sweep:
+ /// BISE decode the colour values (§C.2.12) and unquantize them (§C.2.13), decode the
+ /// endpoint pair (§C.2.14), BISE decode the weights (§C.2.12), unquantize them (§C.2.17),
+ /// and infill from the weight grid to the texel grid (§C.2.18). Populates
+ /// and returns the decoded endpoint pair.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ internal static ColorEndpointPair DecodeFusedCore(
+ UInt128 bits, in BlockInfo info, Footprint footprint, Span texelWeights)
+ {
+ // 1. BISE decode color endpoint values.
+ // Single-partition fused path: up to 8 ints (32 bytes) — single-mode CEM caps values at 8.
+ int colorCount = info.EndpointMode0.GetColorValuesCount();
+ Span colors = stackalloc int[colorCount];
+ DecodeBiseValues(bits, info.Colors.StartBit, info.Colors.BitCount, info.Colors.Range, colorCount, colors);
+
+ // 2. Batch unquantize color values, then decode endpoint pair
+ Quantization.UnquantizeCEValuesBatch(colors, info.Colors.Range);
+ ColorEndpointPair endpointPair = EndpointCodec.Decode(colors, info.EndpointMode0);
+
+ // 3. BISE decode weights.
+ // Up to 64 ints (256 bytes) — spec §C.2.11 caps single-plane gridSize at 64.
+ int gridSize = info.Weights.Width * info.Weights.Height;
+ Span gridWeights = stackalloc int[gridSize];
+ DecodeBiseWeights(bits, info.Weights.BitCount, info.Weights.Range, gridSize, gridWeights);
+
+ // 4. Batch unquantize weights
+ Quantization.UnquantizeWeightsBatch(gridWeights, info.Weights.Range);
+
+ // 5. Infill weights from grid to texels (or pass through if identity mapping)
+ if (info.Weights.Width == footprint.Width && info.Weights.Height == footprint.Height)
+ {
+ gridWeights[..footprint.PixelCount].CopyTo(texelWeights);
+ }
+ else
+ {
+ DecimationInfo decimationInfo = DecimationTable.Get(footprint, info.Weights.Width, info.Weights.Height);
+ DecimationTable.InfillWeights(gridWeights, decimationInfo, texelWeights);
+ }
+
+ return endpointPair;
+ }
+
+ ///
+ /// Decodes BISE-encoded (ASTC spec §C.2.12) colour endpoint values from the specified
+ /// bit region of the block.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static void DecodeBiseValues(UInt128 bits, int startBit, int bitCount, int range, int valuesCount, Span result)
+ {
+ UInt128 source = (bits >> startBit) & UInt128Extensions.OnesMask(bitCount);
+ DecodeBiseSequence(source, range, valuesCount, result);
+ }
+
+ ///
+ /// Decodes BISE-encoded (ASTC spec §C.2.12) weights from the reversed high-end of the
+ /// block. Weight data is stored MSB-first at the top of the 128-bit block, so the bits
+ /// are reversed before decode so the BISE reader can consume them in normal LSB-first
+ /// order.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static void DecodeBiseWeights(UInt128 bits, int weightBitCount, int weightRange, int count, Span result)
+ {
+ UInt128 source = UInt128Extensions.ReverseBits(bits) & UInt128Extensions.OnesMask(weightBitCount);
+ DecodeBiseSequence(source, weightRange, count, result);
+ }
+
+ ///
+ /// Decodes a BISE sequence from bits pre-normalised to start at bit 0.
+ /// For bit-only encoding, extracts values directly via shifts (no BitStream).
+ /// Trit/quint encodings fall back to the full BISE decoder.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecodeBiseSequence(UInt128 source, int range, int count, Span result)
+ {
+ // Range is in [1, 255] by construction — BlockInfo's ColorValuesRange/WeightRange come
+ // from BlockModeDecoder's spec-bound tables, so skip the redundant per-block bounds check.
+ (BiseEncodingMode encMode, int bitsPerValue) = BoundedIntegerSequenceCodec.GetPackingModeBitCountUnchecked(range);
+
+ if (encMode != BiseEncodingMode.BitEncoding)
+ {
+ BitStream stream = new(source, 128);
+ BoundedIntegerSequenceDecoder.Decode(encMode, bitsPerValue, count, ref stream, result);
+ return;
+ }
+
+ ulong mask = (1UL << bitsPerValue) - 1;
+ ulong lowBits = source.Low();
+ int totalBits = count * bitsPerValue;
+
+ if (totalBits <= 64)
+ {
+ for (int i = 0; i < count; i++)
+ {
+ result[i] = (int)(lowBits & mask);
+ lowBits >>= bitsPerValue;
+ }
+
+ return;
+ }
+
+ ulong highBits = source.High();
+ int bitPos = 0;
+ for (int i = 0; i < count; i++)
+ {
+ if (bitPos < 64)
+ {
+ ulong val = (lowBits >> bitPos) & mask;
+ if (bitPos + bitsPerValue > 64)
+ {
+ val |= (highBits << (64 - bitPos)) & mask;
+ }
+
+ result[i] = (int)val;
+ }
+ else
+ {
+ result[i] = (int)((highBits >> (bitPos - 64)) & mask);
+ }
+
+ bitPos += bitsPerValue;
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedHdrBlockDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedHdrBlockDecoder.cs
new file mode 100644
index 00000000..ef0a3693
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedHdrBlockDecoder.cs
@@ -0,0 +1,163 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// HDR pixel writers and entry points for the fused decode pipeline.
+/// All methods handle single-partition, non-dual-plane blocks.
+///
+internal static class FusedHdrBlockDecoder
+{
+ ///
+ /// Fused HDR decode to a contiguous float buffer.
+ /// Handles single-partition, non-dual-plane blocks with both LDR and HDR endpoints.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ internal static void DecompressBlockFusedHdr(UInt128 bits, in BlockInfo info, Footprint footprint, Span buffer)
+ => DecompressBlock(
+ bits,
+ in info,
+ footprint,
+ buffer,
+ dstBaseX: 0,
+ dstBaseY: 0,
+ dstRowStride: footprint.Width * BlockInfo.ChannelsPerPixel);
+
+ ///
+ /// Fused HDR decode writing directly to image buffer at strided positions.
+ /// Handles single-partition, non-dual-plane blocks with both LDR and HDR endpoints.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ internal static void DecompressBlockFusedHdrToImage(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int imageWidth,
+ Span imageBuffer)
+ => DecompressBlock(
+ bits,
+ in info,
+ footprint,
+ imageBuffer,
+ dstBaseX,
+ dstBaseY,
+ dstRowStride: imageWidth * BlockInfo.ChannelsPerPixel);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecompressBlock(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ Span buffer,
+ int dstBaseX,
+ int dstBaseY,
+ int dstRowStride)
+ {
+ // Up to 12×12 = 144 ints (576 bytes) for the largest 2D footprint per spec §C.2.4.
+ Span texelWeights = stackalloc int[footprint.PixelCount];
+ ColorEndpointPair endpointPair = FusedBlockDecoder.DecodeFusedCore(bits, in info, footprint, texelWeights);
+
+ if (endpointPair.IsHdr)
+ {
+ WriteHdrPixels(buffer, footprint, dstBaseX, dstBaseY, dstRowStride, in endpointPair, texelWeights);
+ }
+ else
+ {
+ WriteLdrAsHdrPixels(buffer, footprint, dstBaseX, dstBaseY, dstRowStride, in endpointPair, texelWeights);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteLdrAsHdrPixels(
+ Span buffer,
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int dstRowStride,
+ in ColorEndpointPair endpointPair,
+ Span texelWeights)
+ {
+ int lowR = endpointPair.LdrLow.R, lowG = endpointPair.LdrLow.G, lowB = endpointPair.LdrLow.B, lowA = endpointPair.LdrLow.A;
+ int highR = endpointPair.LdrHigh.R, highG = endpointPair.LdrHigh.G, highB = endpointPair.LdrHigh.B, highA = endpointPair.LdrHigh.A;
+
+ int footprintWidth = footprint.Width;
+ int footprintHeight = footprint.Height;
+
+ for (int pixelY = 0; pixelY < footprintHeight; pixelY++)
+ {
+ int dstRowOffset = ((dstBaseY + pixelY) * dstRowStride) + (dstBaseX * BlockInfo.ChannelsPerPixel);
+ int srcRowBase = pixelY * footprintWidth;
+
+ for (int pixelX = 0; pixelX < footprintWidth; pixelX++)
+ {
+ int weight = texelWeights[srcRowBase + pixelX];
+ int dstOffset = dstRowOffset + (pixelX * BlockInfo.ChannelsPerPixel);
+ buffer[dstOffset + 0] = InterpolateLdrAsFloat(lowR, highR, weight);
+ buffer[dstOffset + 1] = InterpolateLdrAsFloat(lowG, highG, weight);
+ buffer[dstOffset + 2] = InterpolateLdrAsFloat(lowB, highB, weight);
+ buffer[dstOffset + 3] = InterpolateLdrAsFloat(lowA, highA, weight);
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteHdrPixels(
+ Span buffer,
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int dstRowStride,
+ in ColorEndpointPair endpointPair,
+ Span texelWeights)
+ {
+ bool alphaIsLdr = endpointPair.AlphaIsLdr;
+ int lowR = endpointPair.HdrLow.R, lowG = endpointPair.HdrLow.G, lowB = endpointPair.HdrLow.B, lowA = endpointPair.HdrLow.A;
+ int highR = endpointPair.HdrHigh.R, highG = endpointPair.HdrHigh.G, highB = endpointPair.HdrHigh.B, highA = endpointPair.HdrHigh.A;
+
+ int footprintWidth = footprint.Width;
+ int footprintHeight = footprint.Height;
+
+ for (int pixelY = 0; pixelY < footprintHeight; pixelY++)
+ {
+ int dstRowOffset = ((dstBaseY + pixelY) * dstRowStride) + (dstBaseX * BlockInfo.ChannelsPerPixel);
+ int srcRowBase = pixelY * footprintWidth;
+
+ for (int pixelX = 0; pixelX < footprintWidth; pixelX++)
+ {
+ int weight = texelWeights[srcRowBase + pixelX];
+ int dstOffset = dstRowOffset + (pixelX * BlockInfo.ChannelsPerPixel);
+ buffer[dstOffset + 0] = InterpolateHdrAsFloat(lowR, highR, weight);
+ buffer[dstOffset + 1] = InterpolateHdrAsFloat(lowG, highG, weight);
+ buffer[dstOffset + 2] = InterpolateHdrAsFloat(lowB, highB, weight);
+
+ if (alphaIsLdr)
+ {
+ // Mode 14 (ASTC spec §C.2.14): alpha is a UNORM16 value interpolated like LDR.
+ buffer[dstOffset + 3] = Interpolation.Unorm16ToFloat(Interpolation.BlendWeighted(lowA, highA, weight));
+ }
+ else
+ {
+ buffer[dstOffset + 3] = InterpolateHdrAsFloat(lowA, highA, weight);
+ }
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static float InterpolateLdrAsFloat(int p0, int p1, int weight)
+ => Interpolation.Unorm16ToFloat(Interpolation.BlendLdrReplicated(p0, p1, weight));
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static float InterpolateHdrAsFloat(int p0, int p1, int weight)
+ {
+ int interpolated = Interpolation.BlendWeighted(p0, p1, weight);
+ return Fp16.LnsToFloat(Math.Clamp(interpolated, 0, 0xFFFF));
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedLdrBlockDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedLdrBlockDecoder.cs
new file mode 100644
index 00000000..1d283dbc
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/FusedLdrBlockDecoder.cs
@@ -0,0 +1,140 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// LDR pixel writers and entry points for the fused decode pipeline.
+/// All methods handle single-partition, non-dual-plane blocks.
+///
+internal static class FusedLdrBlockDecoder
+{
+ ///
+ /// Fused LDR decode to a contiguous buffer.
+ /// Only handles single-partition, non-dual-plane, LDR blocks.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ internal static void DecompressBlockFusedLdr(UInt128 bits, in BlockInfo info, Footprint footprint, Span buffer)
+ => DecompressBlock(
+ bits,
+ in info,
+ footprint,
+ buffer,
+ dstBaseX: 0,
+ dstBaseY: 0,
+ dstRowStride: footprint.Width * BlockInfo.ChannelsPerPixel);
+
+ ///
+ /// Fused LDR decode writing directly to image buffer at strided positions.
+ /// Only handles single-partition, non-dual-plane, LDR blocks.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ internal static void DecompressBlockFusedLdrToImage(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int imageWidth,
+ Span imageBuffer)
+ => DecompressBlock(
+ bits,
+ in info,
+ footprint,
+ imageBuffer,
+ dstBaseX,
+ dstBaseY,
+ dstRowStride: imageWidth * BlockInfo.ChannelsPerPixel);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DecompressBlock(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ Span buffer,
+ int dstBaseX,
+ int dstBaseY,
+ int dstRowStride)
+ {
+ // Up to 12×12 = 144 ints (576 bytes) for the largest 2D footprint per spec §C.2.4.
+ Span texelWeights = stackalloc int[footprint.PixelCount];
+ ColorEndpointPair endpointPair = FusedBlockDecoder.DecodeFusedCore(bits, in info, footprint, texelWeights);
+ WriteLdrPixels(buffer, footprint, dstBaseX, dstBaseY, dstRowStride, in endpointPair, texelWeights);
+ }
+
+ ///
+ /// Writes a footprint-sized block of LDR pixels into at position
+ /// (, ) with the given row stride.
+ /// Uses SIMD where hardware-accelerated; scalar otherwise.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteLdrPixels(
+ Span buffer,
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int dstRowStride,
+ in ColorEndpointPair endpointPair,
+ Span texelWeights)
+ {
+ int lowR = endpointPair.LdrLow.R, lowG = endpointPair.LdrLow.G, lowB = endpointPair.LdrLow.B, lowA = endpointPair.LdrLow.A;
+ int highR = endpointPair.LdrHigh.R, highG = endpointPair.LdrHigh.G, highB = endpointPair.LdrHigh.B, highA = endpointPair.LdrHigh.A;
+
+ int footprintWidth = footprint.Width;
+ int footprintHeight = footprint.Height;
+
+ for (int pixelY = 0; pixelY < footprintHeight; pixelY++)
+ {
+ int dstRowOffset = ((dstBaseY + pixelY) * dstRowStride) + (dstBaseX * BlockInfo.ChannelsPerPixel);
+ int srcRowBase = pixelY * footprintWidth;
+ int pixelX = 0;
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ int limit = footprintWidth - 3;
+ for (; pixelX < limit; pixelX += 4)
+ {
+ int texelIndex = srcRowBase + pixelX;
+ Vector128 weights = Vector128.Create(
+ texelWeights[texelIndex],
+ texelWeights[texelIndex + 1],
+ texelWeights[texelIndex + 2],
+ texelWeights[texelIndex + 3]);
+ SimdHelpers.Write4PixelLdr(
+ buffer,
+ dstRowOffset + (pixelX * BlockInfo.ChannelsPerPixel),
+ lowR,
+ lowG,
+ lowB,
+ lowA,
+ highR,
+ highG,
+ highB,
+ highA,
+ weights);
+ }
+ }
+
+ for (; pixelX < footprintWidth; pixelX++)
+ {
+ SimdHelpers.WriteSinglePixelLdr(
+ buffer,
+ dstRowOffset + (pixelX * BlockInfo.ChannelsPerPixel),
+ lowR,
+ lowG,
+ lowB,
+ lowA,
+ highR,
+ highG,
+ highB,
+ highA,
+ texelWeights[srcRowBase + pixelX]);
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPipeline.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPipeline.cs
new file mode 100644
index 00000000..df115c64
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPipeline.cs
@@ -0,0 +1,72 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// implementation for the HDR (float RGBA) decode profile
+/// (ASTC spec §C.2.5 "HDR Mode"). Accepts both HDR and LDR endpoint modes — LDR endpoints
+/// widen to the [0,1] float range; HDR endpoint modes (2, 3, 7, 11, 14, 15 per §C.2.14)
+/// decode through LNS → FP16 per §C.2.15.
+///
+internal readonly struct HdrPipeline : IBlockPipeline
+{
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool IsBlockLegal(in BlockInfo info) => true;
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WriteErrorColor(Footprint footprint, Span buffer)
+ => FillMagenta(buffer[..(footprint.PixelCount * BlockInfo.ChannelsPerPixel)]);
+
+ ///
+ public void WriteErrorColorClipped(
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int copyWidth,
+ int copyHeight,
+ int imageWidth,
+ Span imageBuffer)
+ {
+ int rowElements = copyWidth * BlockInfo.ChannelsPerPixel;
+ for (int pixelY = 0; pixelY < copyHeight; pixelY++)
+ {
+ int dstOffset = (((dstBaseY + pixelY) * imageWidth) + dstBaseX) * BlockInfo.ChannelsPerPixel;
+ FillMagenta(imageBuffer.Slice(dstOffset, rowElements));
+ }
+ }
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void FusedToImage(UInt128 blockBits, in BlockInfo info, Footprint footprint, int dstBaseX, int dstBaseY, int imageWidth, Span imageBuffer)
+ => FusedHdrBlockDecoder.DecompressBlockFusedHdrToImage(blockBits, in info, footprint, dstBaseX, dstBaseY, imageWidth, imageBuffer);
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void FusedToScratch(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels)
+ => FusedHdrBlockDecoder.DecompressBlockFusedHdr(blockBits, in info, footprint, decodedPixels);
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void LogicalWrite(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels)
+ => LogicalBlock.DecodeToFloats(blockBits, in info, footprint, decodedPixels);
+
+ ///
+ /// Spec §C.2.19 error colour: opaque magenta in the float profile — (1, 0, 1, 1).
+ ///
+ private static void FillMagenta(Span buffer)
+ {
+ for (int i = 0; i < buffer.Length; i += BlockInfo.ChannelsPerPixel)
+ {
+ buffer[i] = 1f;
+ buffer[i + 1] = 0f;
+ buffer[i + 2] = 1f;
+ buffer[i + 3] = 1f;
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPixelWriter.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPixelWriter.cs
new file mode 100644
index 00000000..f87913e0
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/HdrPixelWriter.cs
@@ -0,0 +1,126 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// HDR — writes float RGBA. Handles both LDR and HDR endpoint
+/// modes and the mode-14 LDR-alpha hybrid (ASTC spec §C.2.14, §C.2.15, §C.2.23).
+///
+internal readonly struct HdrPixelWriter : IPixelWriter
+{
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WritePixel(Span buffer, int offset, in ColorEndpointPair endpoint, int weight)
+ => WriteChannels(buffer.Slice(offset, 4), in endpoint, weight, dualPlane: null);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WritePixelDualPlane(
+ Span buffer,
+ int offset,
+ in ColorEndpointPair endpoint,
+ int primaryWeight,
+ int dualPlaneChannel,
+ int dualPlaneWeight)
+ => WriteChannels(
+ buffer.Slice(offset, 4),
+ in endpoint,
+ primaryWeight,
+ dualPlane: new DualPlanePixel(dualPlaneChannel, dualPlaneWeight));
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteChannels(
+ Span pixel,
+ in ColorEndpointPair endpoint,
+ int weight,
+ DualPlanePixel? dualPlane)
+ {
+ if (endpoint.IsHdr)
+ {
+ WriteHdrChannels(pixel, in endpoint, weight, dualPlane);
+ }
+ else
+ {
+ WriteLdrAsHdrChannels(pixel, in endpoint, weight, dualPlane);
+ }
+ }
+
+ ///
+ /// Writes the four HDR-endpoint channels for a single pixel per ASTC spec §C.2.15: LNS →
+ /// FP16 → float. Mode 14 alpha is LDR-as-UNORM16 (§C.2.14); HDR void-extent values are
+ /// already FP16 bit patterns (§C.2.23) and skip the LNS conversion.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteHdrChannels(
+ Span pixel,
+ in ColorEndpointPair endpoint,
+ int weight,
+ DualPlanePixel? dualPlane)
+ {
+ bool alphaIsLdr = endpoint.AlphaIsLdr;
+ bool valuesAreLns = endpoint.ValuesAreLns;
+ for (int channel = 0; channel < 4; ++channel)
+ {
+ int channelWeight = ChannelWeight(channel, weight, dualPlane);
+ ushort interpolated = Interpolation.BlendWeightedAsUnorm16(
+ endpoint.HdrLow.GetChannel(channel),
+ endpoint.HdrHigh.GetChannel(channel),
+ channelWeight);
+
+ if (channel == 3 && alphaIsLdr)
+ {
+ // Mode 14 (spec §C.2.14): alpha is UNORM16, normalise directly.
+ pixel[channel] = interpolated / 65535.0f;
+ }
+ else if (valuesAreLns)
+ {
+ // Normal HDR block (spec §C.2.15): LNS → FP16 → float.
+ pixel[channel] = Fp16.LnsToFloat(interpolated);
+ }
+ else
+ {
+ // Void-extent HDR (spec §C.2.23): values are already FP16 bit patterns.
+ pixel[channel] = Fp16.Fp16ToFloat(interpolated);
+ }
+ }
+ }
+
+ ///
+ /// Writes the four LDR-endpoint channels for a single pixel as HDR floats: UNORM16 → [0,1].
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void WriteLdrAsHdrChannels(
+ Span pixel,
+ in ColorEndpointPair endpoint,
+ int weight,
+ DualPlanePixel? dualPlane)
+ {
+ for (int channel = 0; channel < 4; ++channel)
+ {
+ int channelWeight = ChannelWeight(channel, weight, dualPlane);
+ ushort unorm16 = Interpolation.BlendLdrReplicatedAsUnorm16(
+ endpoint.LdrLow.GetChannel(channel),
+ endpoint.LdrHigh.GetChannel(channel),
+ channelWeight);
+ pixel[channel] = unorm16 / 65535.0f;
+ }
+ }
+
+ ///
+ /// Returns for ordinary channels and the dual-plane secondary
+ /// weight only on the channel named in . Single-plane callers
+ /// pass null.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int ChannelWeight(int channel, int primary, DualPlanePixel? dualPlane)
+ => dualPlane is { } dp && channel == dp.Channel ? dp.Weight : primary;
+
+ ///
+ /// Per-pixel description of the dual-plane override for a single texel: the dual-plane
+ /// channel index plus the secondary-plane weight. null means single-plane.
+ ///
+ private readonly record struct DualPlanePixel(int Channel, int Weight);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IBlockPipeline.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IBlockPipeline.cs
new file mode 100644
index 00000000..824c4d98
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IBlockPipeline.cs
@@ -0,0 +1,94 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// Pipeline strategy for the shared image-decode loop in . Each
+/// ASTC decode profile (spec §C.2.5 — LDR or HDR mode) provides a concrete implementation.
+///
+/// Pixel element type — for LDR, for HDR.
+internal interface IBlockPipeline
+ where T : unmanaged
+{
+ ///
+ /// Returns true if is decodable under this profile. The LDR
+ /// pipeline returns false for HDR-mode blocks (spec §C.2.19, §C.2.25 — HDR endpoint
+ /// formats are reserved in the LDR profile and produce the error colour). The HDR
+ /// pipeline accepts every legal block.
+ ///
+ /// Decoded block info.
+ /// True if the block can be decoded by this pipeline.
+ public bool IsBlockLegal(in BlockInfo info);
+
+ ///
+ /// Writes the spec-mandated error colour (ASTC spec §C.2.19, §C.2.24) into a
+ /// footprint-sized region of starting at offset 0. Magenta
+ /// (R=1, G=0, B=1, A=1) in both profiles.
+ ///
+ /// Block footprint.
+ /// Scratch or image buffer; the first footprint.PixelCount
+ /// pixels are overwritten.
+ public void WriteErrorColor(Footprint footprint, Span buffer);
+
+ ///
+ /// Writes the spec-mandated error colour into the image buffer at
+ /// (, ) for a footprint-sized
+ /// region, clipped to × .
+ /// Used at edge blocks where the footprint extends beyond the image.
+ ///
+ /// Block footprint.
+ /// Destination x origin in pixels.
+ /// Destination y origin in pixels.
+ /// Clipped block width in pixels.
+ /// Clipped block height in pixels.
+ /// Image width in pixels (row stride in pixels).
+ /// Destination image buffer.
+ public void WriteErrorColorClipped(
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int copyWidth,
+ int copyHeight,
+ int imageWidth,
+ Span imageBuffer);
+
+ ///
+ /// Fused fast path writing straight to the image buffer at
+ /// (, ).
+ /// Handles the common shape — single-partition, single-plane,
+ /// non-void-extent (spec §C.2.10–§C.2.20) — by fusing BISE
+ /// decode + unquantise + weight infill + pixel write.
+ ///
+ /// Raw 128-bit ASTC block.
+ /// Decoded block info.
+ /// Block footprint.
+ /// Destination x origin in pixels.
+ /// Destination y origin in pixels.
+ /// Image width in pixels (row stride in pixels).
+ /// Destination image buffer.
+ public void FusedToImage(UInt128 blockBits, in BlockInfo info, Footprint footprint, int dstBaseX, int dstBaseY, int imageWidth, Span imageBuffer);
+
+ ///
+ /// Fused fast path writing to a per-block scratch buffer (used at
+ /// image edges that need cropping). Same decode shape as .
+ ///
+ /// Raw 128-bit ASTC block.
+ /// Decoded block info.
+ /// Block footprint.
+ /// Scratch buffer sized for one full block.
+ public void FusedToScratch(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels);
+
+ ///
+ /// General pipeline writer for blocks the fused path cannot handle:
+ /// void-extent (spec §C.2.23), multi-partition (spec §C.2.21), and dual-plane (spec §C.2.20).
+ /// Implementations forward to the appropriate decode entry.
+ ///
+ /// Raw 128-bit ASTC block.
+ /// Decoded block info.
+ /// Block footprint.
+ /// Scratch buffer sized for one full block.
+ public void LogicalWrite(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IPixelWriter.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IPixelWriter.cs
new file mode 100644
index 00000000..61b56d58
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/IPixelWriter.cs
@@ -0,0 +1,43 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// Per-pixel writer strategy for the general (logical-block) decode loop in .
+///
+/// Pixel element type — for LDR (UNORM8 RGBA), for HDR (float32 RGBA).
+internal interface IPixelWriter
+ where T : unmanaged
+{
+ ///
+ /// Writes one pixel at buffer[offset..offset+4] using
+ /// for every channel.
+ ///
+ /// Destination pixel buffer.
+ /// Element offset of the pixel's first channel.
+ /// Per-partition endpoint pair for this texel.
+ /// Unquantised weight (0..64) for every channel.
+ void WritePixel(Span buffer, int offset, in ColorEndpointPair endpoint, int weight);
+
+ ///
+ /// Writes one pixel where the channel identified by
+ /// uses instead of
+ /// (ASTC spec §C.2.20).
+ ///
+ /// Destination pixel buffer.
+ /// Element offset of the pixel's first channel.
+ /// Per-partition endpoint pair for this texel.
+ /// Unquantised weight (0..64) for the three primary-plane channels.
+ /// RGBA channel index (0..3) driven by the secondary plane.
+ /// Unquantised weight (0..64) for the dual-plane channel at this texel.
+ void WritePixelDualPlane(
+ Span buffer,
+ int offset,
+ in ColorEndpointPair endpoint,
+ int primaryWeight,
+ int dualPlaneChannel,
+ int dualPlaneWeight);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPipeline.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPipeline.cs
new file mode 100644
index 00000000..459d2237
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPipeline.cs
@@ -0,0 +1,71 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// implementation for the LDR (byte RGBA) decode profile
+/// (ASTC spec §C.2.5 "LDR Mode"). HDR-mode blocks are reserved in the LDR profile per §C.2.25
+/// and produce the error colour (magenta) per §C.2.19, §C.2.24.
+///
+internal readonly struct LdrPipeline : IBlockPipeline
+{
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool IsBlockLegal(in BlockInfo info) => !info.IsHdr;
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WriteErrorColor(Footprint footprint, Span buffer)
+ => FillMagenta(buffer[..(footprint.PixelCount * BlockInfo.ChannelsPerPixel)]);
+
+ ///
+ public void WriteErrorColorClipped(
+ Footprint footprint,
+ int dstBaseX,
+ int dstBaseY,
+ int copyWidth,
+ int copyHeight,
+ int imageWidth,
+ Span imageBuffer)
+ {
+ int rowElements = copyWidth * BlockInfo.ChannelsPerPixel;
+ for (int pixelY = 0; pixelY < copyHeight; pixelY++)
+ {
+ int dstOffset = (((dstBaseY + pixelY) * imageWidth) + dstBaseX) * BlockInfo.ChannelsPerPixel;
+ FillMagenta(imageBuffer.Slice(dstOffset, rowElements));
+ }
+ }
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void FusedToImage(UInt128 blockBits, in BlockInfo info, Footprint footprint, int dstBaseX, int dstBaseY, int imageWidth, Span imageBuffer)
+ => FusedLdrBlockDecoder.DecompressBlockFusedLdrToImage(blockBits, in info, footprint, dstBaseX, dstBaseY, imageWidth, imageBuffer);
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void FusedToScratch(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels)
+ => FusedLdrBlockDecoder.DecompressBlockFusedLdr(blockBits, in info, footprint, decodedPixels);
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void LogicalWrite(UInt128 blockBits, in BlockInfo info, Footprint footprint, Span decodedPixels)
+ => LogicalBlock.DecodeToBytes(blockBits, in info, footprint, decodedPixels);
+
+ ///
+ /// Spec §C.2.19 error colour: opaque magenta (0xFF, 0x00, 0xFF, 0xFF) as UNORM8 RGBA.
+ ///
+ private static void FillMagenta(Span buffer)
+ {
+ for (int i = 0; i < buffer.Length; i += BlockInfo.ChannelsPerPixel)
+ {
+ buffer[i] = 0xFF;
+ buffer[i + 1] = 0x00;
+ buffer[i + 2] = 0xFF;
+ buffer[i + 3] = 0xFF;
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPixelWriter.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPixelWriter.cs
new file mode 100644
index 00000000..434def5f
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LdrPixelWriter.cs
@@ -0,0 +1,52 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// LDR — writes UNORM8 RGBA bytes via the scalar SIMD helpers.
+///
+internal readonly struct LdrPixelWriter : IPixelWriter
+{
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WritePixel(Span buffer, int offset, in ColorEndpointPair endpoint, int weight)
+ => SimdHelpers.WriteSinglePixelLdr(
+ buffer,
+ offset,
+ endpoint.LdrLow.R,
+ endpoint.LdrLow.G,
+ endpoint.LdrLow.B,
+ endpoint.LdrLow.A,
+ endpoint.LdrHigh.R,
+ endpoint.LdrHigh.G,
+ endpoint.LdrHigh.B,
+ endpoint.LdrHigh.A,
+ weight);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void WritePixelDualPlane(
+ Span buffer,
+ int offset,
+ in ColorEndpointPair endpoint,
+ int primaryWeight,
+ int dualPlaneChannel,
+ int dualPlaneWeight)
+ => SimdHelpers.WriteSinglePixelLdrDualPlane(
+ buffer,
+ offset,
+ endpoint.LdrLow.R,
+ endpoint.LdrLow.G,
+ endpoint.LdrLow.B,
+ endpoint.LdrLow.A,
+ endpoint.LdrHigh.R,
+ endpoint.LdrHigh.G,
+ endpoint.LdrHigh.B,
+ endpoint.LdrHigh.A,
+ primaryWeight,
+ dualPlaneChannel,
+ dualPlaneWeight);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LogicalBlock.cs b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LogicalBlock.cs
new file mode 100644
index 00000000..6bd50098
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/BlockDecoding/LogicalBlock.cs
@@ -0,0 +1,339 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.BlockDecoding;
+
+///
+/// General-purpose ASTC block decoder for blocks the fused fast paths cannot handle —
+/// void-extent (spec §C.2.23), multi-partition (spec §C.2.21), and dual-plane (spec §C.2.20).
+///
+internal static class LogicalBlock
+{
+ ///
+ /// Decodes a block to its UNORM8 RGBA pixels. HDR-endpoint blocks must not reach this
+ /// method: the LDR entry points in reject HDR content per
+ /// ASTC spec §C.2.19, so every partition's endpoint here is LDR.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void DecodeToBytes(UInt128 bits, in BlockInfo info, Footprint footprint, Span pixels)
+ {
+ if (!info.IsValid)
+ {
+ return;
+ }
+
+ // Conditional stackalloc isn't legal inside an expression; split the dual-plane case
+ // into a separate frame so the secondary-plane buffer is only stackalloc'd when needed.
+ if (info.DualPlane.Enabled && !info.IsVoidExtent)
+ {
+ DecodeToBytesDualPlane(bits, in info, footprint, pixels);
+ return;
+ }
+
+ // Up to 12×12 = 144 ints (576 bytes) for the largest 2D footprint per spec §C.2.4.
+ Span weights = stackalloc int[footprint.PixelCount];
+ DecodedBlockState state = DecodeSinglePlane(bits, in info, footprint, weights);
+
+ WriteAllPixels(footprint, pixels, in state);
+ }
+
+ ///
+ /// Decodes a block to its float RGBA pixels. Accepts both LDR and HDR endpoint modes.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void DecodeToFloats(UInt128 bits, in BlockInfo info, Footprint footprint, Span pixels)
+ {
+ if (!info.IsValid)
+ {
+ return;
+ }
+
+ if (info.DualPlane.Enabled && !info.IsVoidExtent)
+ {
+ DecodeToFloatsDualPlane(bits, in info, footprint, pixels);
+ return;
+ }
+
+ // Up to 12×12 = 144 ints (576 bytes) for the largest 2D footprint per spec §C.2.4.
+ Span weights = stackalloc int[footprint.PixelCount];
+ DecodedBlockState state = DecodeSinglePlane(bits, in info, footprint, weights);
+
+ WriteAllPixels(footprint, pixels, in state);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static void DecodeToBytesDualPlane(UInt128 bits, in BlockInfo info, Footprint footprint, Span pixels)
+ {
+ // Two weight planes for dual-plane blocks (spec §C.2.20). Up to 2 × 144 = 288 ints
+ // (1152 bytes) at the largest 12×12 footprint.
+ Span weights = stackalloc int[footprint.PixelCount];
+ Span secondaryWeights = stackalloc int[footprint.PixelCount];
+ DecodedBlockState state = DecodeDualPlane(bits, in info, footprint, weights, secondaryWeights);
+ DualPlane dualPlane = new() { Weights = secondaryWeights, Channel = info.DualPlane.Channel };
+
+ WriteAllPixelsDualPlane(footprint, pixels, in state, in dualPlane);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static void DecodeToFloatsDualPlane(UInt128 bits, in BlockInfo info, Footprint footprint, Span pixels)
+ {
+ // Two weight planes for dual-plane blocks (spec §C.2.20). Up to 2 × 144 = 288 ints
+ // (1152 bytes) at the largest 12×12 footprint.
+ Span weights = stackalloc int[footprint.PixelCount];
+ Span secondaryWeights = stackalloc int[footprint.PixelCount];
+ DecodedBlockState state = DecodeDualPlane(bits, in info, footprint, weights, secondaryWeights);
+ DualPlane dualPlane = new() { Weights = secondaryWeights, Channel = info.DualPlane.Channel };
+
+ WriteAllPixelsDualPlane(footprint, pixels, in state, in dualPlane);
+ }
+
+ ///
+ /// Builds the for a single-plane or void-extent block.
+ ///
+ private static DecodedBlockState DecodeSinglePlane(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ Span weights)
+ {
+ DecodedBlockState state = default;
+ state.Weights = weights;
+
+ if (info.IsVoidExtent)
+ {
+ state.Endpoints[0] = DecodeVoidExtentEndpoint(bits, info.IsHdr);
+ weights.Clear();
+ state.PartitionAssignment = Partition.GetSinglePartition(footprint).Assignment;
+ return state;
+ }
+
+ DecodeEndpointsFromBits(bits, in info, ref state.Endpoints);
+ DecodeAndInfillWeights(bits, in info, footprint, weights, default);
+ state.PartitionAssignment = ResolvePartitionAssignment(bits, info.PartitionCount, footprint);
+ return state;
+ }
+
+ ///
+ /// Builds the for a dual-plane block (spec §C.2.20),
+ /// filling with the second plane's per-texel weights.
+ ///
+ private static DecodedBlockState DecodeDualPlane(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ Span weights,
+ Span secondaryWeights)
+ {
+ DecodedBlockState state = default;
+ state.Weights = weights;
+ DecodeEndpointsFromBits(bits, in info, ref state.Endpoints);
+ DecodeAndInfillWeights(bits, in info, footprint, weights, secondaryWeights);
+ state.PartitionAssignment = ResolvePartitionAssignment(bits, info.PartitionCount, footprint);
+ return state;
+ }
+
+ ///
+ /// BISE-decodes (spec §C.2.12) + unquantises (spec §C.2.13) the per-partition color
+ /// endpoint values into (one entry per partition, colour
+ /// value count per mode from §C.2.14).
+ ///
+ private static void DecodeEndpointsFromBits(UInt128 bits, in BlockInfo info, ref EndpointBuffer endpoints)
+ {
+ // Up to 18 ints (72 bytes) — BlockModeDecoder rejects blocks with Colors.Count > 18.
+ Span colors = stackalloc int[info.Colors.Count];
+ FusedBlockDecoder.DecodeBiseValues(
+ bits,
+ info.Colors.StartBit,
+ info.Colors.BitCount,
+ info.Colors.Range,
+ info.Colors.Count,
+ colors);
+ Quantization.UnquantizeCEValuesBatch(colors, info.Colors.Range);
+
+ int colorIndex = 0;
+ for (int i = 0; i < info.PartitionCount; i++)
+ {
+ ColorEndpointMode mode = info.GetEndpointMode(i);
+ int colorCount = mode.GetColorValuesCount();
+ ReadOnlySpan slice = colors.Slice(colorIndex, colorCount);
+ endpoints[i] = EndpointCodec.Decode(slice, mode);
+ colorIndex += colorCount;
+ }
+ }
+
+ ///
+ /// Returns the cached partition-assignment map. Multi-partition blocks use the 10-bit
+ /// partition id from bits [13..22] (spec §C.2.10) and the partition hash function
+ /// (spec §C.2.21); single-partition blocks share an all-zero map per footprint.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static ReadOnlySpan ResolvePartitionAssignment(UInt128 bits, int partitionCount, Footprint footprint)
+ => partitionCount > 1
+ ? Partition.GetASTCPartition(
+ footprint,
+ partitionCount,
+ (int)BitOperations.GetBits(bits.Low(), 13, 10)).Assignment
+ : Partition.GetSinglePartition(footprint).Assignment;
+
+ ///
+ /// BISE-decodes (spec §C.2.12), unquantises (spec §C.2.17), and infills the weight grid
+ /// (spec §C.2.18) into . For dual-plane blocks
+ /// (spec §C.2.20) the secondary plane is decoded into ;
+ /// otherwise is ignored.
+ ///
+ private static void DecodeAndInfillWeights(
+ UInt128 bits,
+ in BlockInfo info,
+ Footprint footprint,
+ Span primaryWeights,
+ Span secondaryWeights)
+ {
+ int gridSize = info.Weights.Width * info.Weights.Height;
+ bool isDualPlane = info.DualPlane.Enabled;
+ int totalWeights = isDualPlane ? gridSize * 2 : gridSize;
+
+ // Up to 128 ints (512 bytes) — spec §C.2.11 caps total weights (gridSize × planes) at 64
+ // for single-plane and 128 (i.e. 64 × 2) for dual-plane.
+ Span rawWeights = stackalloc int[totalWeights];
+ FusedBlockDecoder.DecodeBiseWeights(
+ bits,
+ info.Weights.BitCount,
+ info.Weights.Range,
+ totalWeights,
+ rawWeights);
+
+ DecimationInfo decimationInfo = DecimationTable.Get(footprint, info.Weights.Width, info.Weights.Height);
+
+ if (!isDualPlane)
+ {
+ Quantization.UnquantizeWeightsBatch(rawWeights, info.Weights.Range);
+ DecimationTable.InfillWeights(rawWeights[..gridSize], decimationInfo, primaryWeights);
+ return;
+ }
+
+ // Spec §C.2.20: the two planes' weights are interleaved — even indices drive the
+ // main plane, odd the secondary plane. Each plane has up to 64 ints (256 bytes); spec
+ // §C.2.11 caps gridSize × 2 ≤ 128, so gridSize ≤ 64 for dual-plane.
+ Span plane0 = stackalloc int[gridSize];
+ Span plane1 = stackalloc int[gridSize];
+ for (int i = 0; i < gridSize; i++)
+ {
+ plane0[i] = rawWeights[i * 2];
+ plane1[i] = rawWeights[(i * 2) + 1];
+ }
+
+ Quantization.UnquantizeWeightsBatch(plane0, info.Weights.Range);
+ Quantization.UnquantizeWeightsBatch(plane1, info.Weights.Range);
+
+ DecimationTable.InfillWeights(plane0, decimationInfo, primaryWeights);
+ DecimationTable.InfillWeights(plane1, decimationInfo, secondaryWeights);
+ }
+
+ ///
+ /// Reads the four 16-bit RGBA channels from the high half of a void-extent block
+ /// (ASTC spec §C.2.23) and wraps them in a . LDR void-extent
+ /// channels are UNORM16 (reduced to byte range for the LDR output path); HDR channels are
+ /// FP16 bit patterns.
+ ///
+ private static ColorEndpointPair DecodeVoidExtentEndpoint(UInt128 bits, bool isHdr)
+ {
+ ulong high = bits.High();
+ ushort r = (ushort)(high & 0xFFFF);
+ ushort g = (ushort)((high >> 16) & 0xFFFF);
+ ushort b = (ushort)((high >> 32) & 0xFFFF);
+ ushort a = (ushort)((high >> 48) & 0xFFFF);
+
+ if (isHdr)
+ {
+ Rgba64 hdrColor = new(r, g, b, a);
+ return ColorEndpointPair.Hdr(hdrColor, hdrColor, valuesAreLns: false);
+ }
+
+ Rgba32 ldrColor = new((byte)(r >> 8), (byte)(g >> 8), (byte)(b >> 8), (byte)(a >> 8));
+ return ColorEndpointPair.Ldr(ldrColor, ldrColor);
+ }
+
+ ///
+ /// Generic single-plane pixel-write loop. Each iteration looks up the partition's
+ /// endpoint and dispatches to for the actual write.
+ /// Constraining to a struct allows the JIT to specialise
+ /// and inline the per-pixel call.
+ ///
+ private static void WriteAllPixels(Footprint footprint, Span buffer, in DecodedBlockState state)
+ where TWriter : struct, IPixelWriter
+ where T : unmanaged
+ {
+ TWriter writer = default;
+ int pixelCount = footprint.PixelCount;
+ for (int i = 0; i < pixelCount; i++)
+ {
+ ref readonly ColorEndpointPair endpoint = ref state.Endpoints[state.PartitionAssignment[i]];
+ writer.WritePixel(buffer, i * BlockInfo.ChannelsPerPixel, in endpoint, state.Weights[i]);
+ }
+ }
+
+ ///
+ /// Generic dual-plane pixel-write loop (ASTC spec §C.2.20). Same shape as
+ /// but the channel named by
+ /// uses the secondary plane's per-texel weight.
+ ///
+ private static void WriteAllPixelsDualPlane(
+ Footprint footprint,
+ Span buffer,
+ in DecodedBlockState state,
+ in DualPlane dualPlane)
+ where TWriter : struct, IPixelWriter
+ where T : unmanaged
+ {
+ TWriter writer = default;
+ int dpChannel = dualPlane.Channel;
+ int pixelCount = footprint.PixelCount;
+ for (int i = 0; i < pixelCount; i++)
+ {
+ ref readonly ColorEndpointPair endpoint = ref state.Endpoints[state.PartitionAssignment[i]];
+ writer.WritePixelDualPlane(buffer, i * BlockInfo.ChannelsPerPixel, in endpoint, state.Weights[i], dpChannel, dualPlane.Weights[i]);
+ }
+ }
+
+ ///
+ /// Inline storage for up to 4 per-partition values
+ /// (spec §C.2.10 caps partition count at 4). Used as a stack-local buffer to hold the
+ /// decoded endpoints during a single / call.
+ ///
+ [InlineArray(4)]
+ private struct EndpointBuffer
+ {
+#pragma warning disable CS0169, IDE0051, S1144 // Accessed by runtime via [InlineArray]
+ private ColorEndpointPair element0;
+#pragma warning restore CS0169, IDE0051, S1144
+ }
+
+ ///
+ /// State common to single-plane and dual-plane blocks: per-partition endpoints, primary
+ /// per-texel weights, and the partition-assignment map. Stack-only — holds a stack-local
+ /// and a .
+ ///
+ private ref struct DecodedBlockState
+ {
+ public EndpointBuffer Endpoints;
+ public Span Weights;
+ public ReadOnlySpan PartitionAssignment;
+ }
+
+ ///
+ /// Secondary weight plane for dual-plane blocks (ASTC spec §C.2.20). The channel
+ /// identified by uses these per-texel weights instead of the
+ /// primary plane's.
+ ///
+ private ref struct DualPlane
+ {
+ public Span Weights;
+ public int Channel;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointMode.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointMode.cs
new file mode 100644
index 00000000..d14e50dd
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointMode.cs
@@ -0,0 +1,38 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+///
+/// ASTC supports 16 color endpoint encoding schemes, known as endpoint modes
+///
+///
+/// The options for endpoint modes let you vary the following:
+///
+/// - The number of color channels. For example, luminance, luminance+alpha, rgb, or rgba
+/// - The encoding method. For example, direct, base+offset, base+scale, or quantization level
+/// - The data range. For example, low dynamic range or High Dynamic Range
+///
+///
+internal enum ColorEndpointMode
+{
+ LdrLumaDirect = 0,
+ LdrLumaBaseOffset,
+ HdrLumaLargeRange,
+ HdrLumaSmallRange,
+ LdrLumaAlphaDirect,
+ LdrLumaAlphaBaseOffset,
+ LdrRgbBaseScale,
+ HdrRgbBaseScale,
+ LdrRgbDirect,
+ LdrRgbBaseOffset,
+ LdrRgbBaseScaleTwoA,
+ HdrRgbDirect,
+ LdrRgbaDirect,
+ LdrRgbaBaseOffset,
+ HdrRgbDirectLdrAlpha,
+ HdrRgbDirectHdrAlpha,
+
+ // Number of endpoint modes defined by the ASTC specification.
+ ColorEndpointModeCount
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointModeExtensions.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointModeExtensions.cs
new file mode 100644
index 00000000..7ceccf86
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointModeExtensions.cs
@@ -0,0 +1,31 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+internal static class ColorEndpointModeExtensions
+{
+ public static int GetEndpointModeClass(this ColorEndpointMode mode)
+ => (int)mode / 4;
+
+ public static int GetColorValuesCount(this ColorEndpointMode mode)
+ => (mode.GetEndpointModeClass() + 1) * 2;
+
+ ///
+ /// Determines whether the specified endpoint mode uses HDR (High Dynamic Range) encoding.
+ ///
+ ///
+ /// True if the mode is one of the 6 HDR modes (2, 3, 7, 11, 14, 15), false otherwise.
+ ///
+ public static bool IsHdr(this ColorEndpointMode mode)
+ => mode switch
+ {
+ ColorEndpointMode.HdrLumaLargeRange => true, // Mode 2
+ ColorEndpointMode.HdrLumaSmallRange => true, // Mode 3
+ ColorEndpointMode.HdrRgbBaseScale => true, // Mode 7
+ ColorEndpointMode.HdrRgbDirect => true, // Mode 11
+ ColorEndpointMode.HdrRgbDirectLdrAlpha => true, // Mode 14
+ ColorEndpointMode.HdrRgbDirectHdrAlpha => true, // Mode 15
+ _ => false
+ };
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointPair.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointPair.cs
new file mode 100644
index 00000000..b4c11fd5
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/ColorEndpointPair.cs
@@ -0,0 +1,32 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+///
+/// A value-type discriminated union representing either an LDR or HDR color endpoint pair.
+///
+[StructLayout(LayoutKind.Auto)]
+internal struct ColorEndpointPair
+{
+ public bool IsHdr;
+
+ // LDR fields (used when IsHdr == false)
+ public Rgba32 LdrLow;
+ public Rgba32 LdrHigh;
+
+ // HDR fields (used when IsHdr == true)
+ public Rgba64 HdrLow;
+ public Rgba64 HdrHigh;
+ public bool AlphaIsLdr;
+ public bool ValuesAreLns;
+
+ public static ColorEndpointPair Ldr(Rgba32 low, Rgba32 high)
+ => new() { IsHdr = false, LdrLow = low, LdrHigh = high };
+
+ public static ColorEndpointPair Hdr(Rgba64 low, Rgba64 high, bool alphaIsLdr = false, bool valuesAreLns = true)
+ => new() { IsHdr = true, HdrLow = low, HdrHigh = high, AlphaIsLdr = alphaIsLdr, ValuesAreLns = valuesAreLns };
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/EndpointCodec.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/EndpointCodec.cs
new file mode 100644
index 00000000..0ac1aa36
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/EndpointCodec.cs
@@ -0,0 +1,174 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Textures.Compression.Astc.BiseEncoding.Quantize;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+using static SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding.Rgba32Extensions;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+internal static class EndpointCodec
+{
+ ///
+ /// Decodes color endpoints for the specified mode from already-unquantized values.
+ /// Handles both LDR and HDR endpoint modes (ASTC spec §C.2.14).
+ ///
+ ///
+ /// Quantized input should be run through first.
+ ///
+ public static ColorEndpointPair Decode(ReadOnlySpan unquantizedValues, ColorEndpointMode mode)
+ {
+ if (mode.IsHdr())
+ {
+ (Rgba64 hdrLow, Rgba64 hdrHigh) = HdrEndpointDecoder.DecodeHdrModeUnquantized(unquantizedValues, mode);
+ bool alphaIsLdr = mode == ColorEndpointMode.HdrRgbDirectLdrAlpha;
+ return ColorEndpointPair.Hdr(hdrLow, hdrHigh, alphaIsLdr);
+ }
+
+ (Rgba32 low, Rgba32 high) = mode switch
+ {
+ ColorEndpointMode.LdrLumaDirect => DecodeLumaDirect(unquantizedValues),
+ ColorEndpointMode.LdrLumaBaseOffset => DecodeLumaBaseOffset(unquantizedValues),
+ ColorEndpointMode.LdrLumaAlphaDirect => DecodeLumaAlphaDirect(unquantizedValues),
+ ColorEndpointMode.LdrLumaAlphaBaseOffset => DecodeLumaAlphaBaseOffset(unquantizedValues),
+ ColorEndpointMode.LdrRgbBaseScale => DecodeRgbBaseScale(unquantizedValues),
+ ColorEndpointMode.LdrRgbDirect => DecodeRgbDirect(unquantizedValues),
+ ColorEndpointMode.LdrRgbBaseOffset => DecodeRgbBaseOffset(unquantizedValues),
+ ColorEndpointMode.LdrRgbBaseScaleTwoA => DecodeRgbBaseScaleTwoAlpha(unquantizedValues),
+ ColorEndpointMode.LdrRgbaDirect => DecodeRgbaDirect(unquantizedValues),
+ ColorEndpointMode.LdrRgbaBaseOffset => DecodeRgbaBaseOffset(unquantizedValues),
+ _ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Unknown endpoint mode"),
+ };
+
+ return ColorEndpointPair.Ldr(low, high);
+ }
+
+ // Each decoder below implements one LDR endpoint mode per ASTC spec §C.2.14
+ // (Color Endpoint Decoding). Inputs are the unquantized color values for that mode.
+
+ // Mode 0 (§C.2.14 "LDR luminance, direct"): two 8-bit luma values.
+ private static (Rgba32 Low, Rgba32 High) DecodeLumaDirect(ReadOnlySpan v)
+ => (ClampedRgba32(v[0], v[0], v[0]),
+ ClampedRgba32(v[1], v[1], v[1]));
+
+ // Mode 1 (§C.2.14 "LDR luminance, base+offset"): v0 plus the top bits of v1 form the low
+ // luma; the bottom six bits of v1 are a saturated offset added to form the high luma.
+ private static (Rgba32 Low, Rgba32 High) DecodeLumaBaseOffset(ReadOnlySpan v)
+ {
+ int l0 = (v[0] >> 2) | (v[1] & 0xC0);
+ int l1 = Math.Min(l0 + (v[1] & 0x3F), 0xFF);
+ return (ClampedRgba32(l0, l0, l0),
+ ClampedRgba32(l1, l1, l1));
+ }
+
+ // Mode 4 (§C.2.14 "LDR luminance+alpha, direct"): v0,v1 → luma; v2,v3 → alpha.
+ private static (Rgba32 Low, Rgba32 High) DecodeLumaAlphaDirect(ReadOnlySpan v)
+ => (ClampedRgba32(v[0], v[0], v[0], v[2]),
+ ClampedRgba32(v[1], v[1], v[1], v[3]));
+
+ // Mode 5 (§C.2.14 "LDR luminance+alpha, base+offset"): TransferPrecision unpacks each
+ // (high,low) pair into a signed offset b and a base a.
+ private static (Rgba32 Low, Rgba32 High) DecodeLumaAlphaBaseOffset(ReadOnlySpan v)
+ {
+ (int bL, int aL) = BitOperations.TransferPrecision(v[1], v[0]);
+ (int bA, int aA) = BitOperations.TransferPrecision(v[3], v[2]);
+ int highLuma = aL + bL;
+ return (ClampedRgba32(aL, aL, aL, aA),
+ ClampedRgba32(highLuma, highLuma, highLuma, aA + bA));
+ }
+
+ // Mode 6 (§C.2.14 "LDR RGB, base+scale"): high = (v0,v1,v2); low = high * v3 >> 8.
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbBaseScale(ReadOnlySpan v)
+ {
+ Rgba32 low = ClampedRgba32((v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
+ Rgba32 high = ClampedRgba32(v[0], v[1], v[2]);
+ return (low, high);
+ }
+
+ // Mode 8 (§C.2.14 "LDR RGB, direct"): if the high triple is dimmer than the low triple
+ // the endpoints are swapped and the R/G channels are averaged against the B channel
+ // ("blue contract" per §C.2.14).
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbDirect(ReadOnlySpan v)
+ {
+ int sumLow = v[0] + v[2] + v[4];
+ int sumHigh = v[1] + v[3] + v[5];
+
+ if (sumHigh < sumLow)
+ {
+ return (ClampedRgba32((v[1] + v[5]) >> 1, (v[3] + v[5]) >> 1, v[5]),
+ ClampedRgba32((v[0] + v[4]) >> 1, (v[2] + v[4]) >> 1, v[4]));
+ }
+
+ return (ClampedRgba32(v[0], v[2], v[4]),
+ ClampedRgba32(v[1], v[3], v[5]));
+ }
+
+ // Mode 9 (§C.2.14 "LDR RGB, base+offset"): per-channel (base, offset). When the sum of
+ // offsets is negative the blue-contract branch applies, otherwise low = base and
+ // high = base + offset.
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbBaseOffset(ReadOnlySpan v)
+ {
+ (int bR, int aR) = BitOperations.TransferPrecision(v[1], v[0]);
+ (int bG, int aG) = BitOperations.TransferPrecision(v[3], v[2]);
+ (int bB, int aB) = BitOperations.TransferPrecision(v[5], v[4]);
+
+ if (bR + bG + bB < 0)
+ {
+ return (ClampedRgba32((aR + bR + aB + bB) >> 1, (aG + bG + aB + bB) >> 1, aB + bB),
+ ClampedRgba32((aR + aB) >> 1, (aG + aB) >> 1, aB));
+ }
+
+ return (ClampedRgba32(aR, aG, aB),
+ ClampedRgba32(aR + bR, aG + bG, aB + bB));
+ }
+
+ // Mode 10 (§C.2.14 "LDR RGB, base+scale plus two alpha values"): same RGB scaling as
+ // mode 6, but v4 and v5 carry independent low/high alpha values.
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbBaseScaleTwoAlpha(ReadOnlySpan v)
+ {
+ Rgba32 low = ClampedRgba32(
+ r: (v[0] * v[3]) >> 8,
+ g: (v[1] * v[3]) >> 8,
+ b: (v[2] * v[3]) >> 8,
+ a: v[4]);
+ Rgba32 high = ClampedRgba32(v[0], v[1], v[2], v[5]);
+ return (low, high);
+ }
+
+ // Mode 12 (§C.2.14 "LDR RGBA, direct"): like RGB-direct plus alpha. When the high
+ // triple is dimmer the endpoints are swapped (RGB via blue-contract, alpha by
+ // index-swap).
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbaDirect(ReadOnlySpan v)
+ {
+ int sumLow = v[0] + v[2] + v[4];
+ int sumHigh = v[1] + v[3] + v[5];
+
+ if (sumHigh >= sumLow)
+ {
+ return (ClampedRgba32(v[0], v[2], v[4], v[6]),
+ ClampedRgba32(v[1], v[3], v[5], v[7]));
+ }
+
+ return (ClampedRgba32((v[1] + v[5]) >> 1, (v[3] + v[5]) >> 1, v[5], v[7]),
+ ClampedRgba32((v[0] + v[4]) >> 1, (v[2] + v[4]) >> 1, v[4], v[6]));
+ }
+
+ // Mode 13 (§C.2.14 "LDR RGBA, base+offset"): mode 9 extended with alpha.
+ private static (Rgba32 Low, Rgba32 High) DecodeRgbaBaseOffset(ReadOnlySpan v)
+ {
+ (int bR, int aR) = BitOperations.TransferPrecision(v[1], v[0]);
+ (int bG, int aG) = BitOperations.TransferPrecision(v[3], v[2]);
+ (int bB, int aB) = BitOperations.TransferPrecision(v[5], v[4]);
+ (int bA, int aA) = BitOperations.TransferPrecision(v[7], v[6]);
+
+ if (bR + bG + bB < 0)
+ {
+ return (ClampedRgba32((aR + bR + aB + bB) >> 1, (aG + bG + aB + bB) >> 1, aB + bB, aA + bA),
+ ClampedRgba32((aR + aB) >> 1, (aG + aB) >> 1, aB, aA));
+ }
+
+ return (ClampedRgba32(aR, aG, aB, aA),
+ ClampedRgba32(aR + bR, aG + bG, aB + bB, aA + bA));
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/HdrEndpointDecoder.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/HdrEndpointDecoder.cs
new file mode 100644
index 00000000..6518d68c
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/HdrEndpointDecoder.cs
@@ -0,0 +1,458 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+///
+/// Decodes HDR (High Dynamic Range) color endpoints for ASTC texture compression.
+///
+///
+/// HDR modes produce 12-bit intermediate values (0-4095) which are shifted left by 4
+/// to produce the final 16-bit values (0-65520) stored as FP16 bit patterns.
+///
+internal static class HdrEndpointDecoder
+{
+ ///
+ /// Output slot index for the HdrRgbBaseScale (CEM 7) bit-placement table.
+ ///
+ private enum BaseScaleTarget
+ {
+ Red,
+ Green,
+ Blue,
+ Scale,
+ }
+
+ ///
+ /// Output slot index for the HdrRgbDirect (CEM 11) bit-placement table.
+ ///
+ private enum DirectTarget
+ {
+ A,
+ B0,
+ B1,
+ C,
+ D0,
+ D1,
+ }
+
+ ///
+ /// One row of an HDR bit-placement table. When the current one-hot mode matches
+ /// , the bit at source index is OR'd into
+ /// the output slot at index , shifted left by .
+ /// The slot is stored as a plain int so the same row type serves both placement
+ /// tables; callers populate it from or .
+ ///
+ private readonly record struct BitPlacement(int Slot, int ModeMask, int SourceBit, int TargetShift);
+
+ // Shift amounts for the HdrRgbBaseScale mode, indexed by the mode selector (0..5).
+ // See ARM astcenc_color_unquantize.cpp rgb_hdr_unpack.
+#pragma warning disable SA1201 // Readability: keep tables adjacent to the types they use.
+ private static readonly int[] BaseScaleShiftByMode = [1, 1, 2, 3, 4, 5];
+
+ // Bit placements for the HdrRgbBaseScale mode (ASTC CEM 7). Each entry represents:
+ // "if the current one-hot mode matches ModeMask, OR sourceBits[SourceBit] into Slot at
+ // position TargetShift." The table reproduces the if-statement ladder from the ARM
+ // reference while making the per-mode pattern directly inspectable.
+ private static readonly BitPlacement[] BaseScalePlacements =
+ [
+ new(Slot: (int)BaseScaleTarget.Green, ModeMask: 0x30, SourceBit: 0, TargetShift: 6),
+ new(Slot: (int)BaseScaleTarget.Green, ModeMask: 0x3A, SourceBit: 1, TargetShift: 5),
+ new(Slot: (int)BaseScaleTarget.Blue, ModeMask: 0x30, SourceBit: 2, TargetShift: 6),
+ new(Slot: (int)BaseScaleTarget.Blue, ModeMask: 0x3A, SourceBit: 3, TargetShift: 5),
+ new(Slot: (int)BaseScaleTarget.Scale, ModeMask: 0x3D, SourceBit: 6, TargetShift: 5),
+ new(Slot: (int)BaseScaleTarget.Scale, ModeMask: 0x2D, SourceBit: 5, TargetShift: 6),
+ new(Slot: (int)BaseScaleTarget.Scale, ModeMask: 0x04, SourceBit: 4, TargetShift: 7),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x3B, SourceBit: 4, TargetShift: 6),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x04, SourceBit: 3, TargetShift: 6),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x10, SourceBit: 5, TargetShift: 7),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x0F, SourceBit: 2, TargetShift: 7),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x05, SourceBit: 1, TargetShift: 8),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x0A, SourceBit: 0, TargetShift: 8),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x05, SourceBit: 0, TargetShift: 9),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x02, SourceBit: 6, TargetShift: 9),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x01, SourceBit: 3, TargetShift: 10),
+ new(Slot: (int)BaseScaleTarget.Red, ModeMask: 0x02, SourceBit: 5, TargetShift: 10),
+ ];
+
+ // Data-bit widths for the HdrRgbDirect mode (ASTC CEM 11), indexed by modeValue (0..7).
+ // Used for sign-extension of the d0/d1 offsets. From ARM reference.
+ private static readonly int[] DirectDataBitsByMode = [7, 6, 7, 6, 5, 6, 5, 6];
+
+ // Bit placements for the HdrRgbDirect mode (ASTC CEM 11). Each entry: if the current
+ // one-hot modeValue matches ModeMask, OR sourceBits[SourceBit] into Slot at TargetShift.
+ // Entries are grouped by Slot (A, C, B0/B1, D0/D1 — see the ARM reference).
+ // Pairs like (B0, B1) or (D0, D1) share a single ModeMask in the ARM reference but
+ // consume different source bits per slot, so they appear as two entries here.
+ private static readonly BitPlacement[] DirectPlacements =
+ [
+ new(Slot: (int)DirectTarget.A, ModeMask: 0xA4, SourceBit: 0, TargetShift: 9),
+ new(Slot: (int)DirectTarget.A, ModeMask: 0x08, SourceBit: 2, TargetShift: 9),
+ new(Slot: (int)DirectTarget.A, ModeMask: 0x50, SourceBit: 4, TargetShift: 9),
+ new(Slot: (int)DirectTarget.A, ModeMask: 0x50, SourceBit: 5, TargetShift: 10),
+ new(Slot: (int)DirectTarget.A, ModeMask: 0xA0, SourceBit: 1, TargetShift: 10),
+ new(Slot: (int)DirectTarget.A, ModeMask: 0xC0, SourceBit: 2, TargetShift: 11),
+ new(Slot: (int)DirectTarget.C, ModeMask: 0x04, SourceBit: 1, TargetShift: 6),
+ new(Slot: (int)DirectTarget.C, ModeMask: 0xE8, SourceBit: 3, TargetShift: 6),
+ new(Slot: (int)DirectTarget.C, ModeMask: 0x20, SourceBit: 2, TargetShift: 7),
+ new(Slot: (int)DirectTarget.B0, ModeMask: 0x5B, SourceBit: 0, TargetShift: 6),
+ new(Slot: (int)DirectTarget.B1, ModeMask: 0x5B, SourceBit: 1, TargetShift: 6),
+ new(Slot: (int)DirectTarget.B0, ModeMask: 0x12, SourceBit: 2, TargetShift: 7),
+ new(Slot: (int)DirectTarget.B1, ModeMask: 0x12, SourceBit: 3, TargetShift: 7),
+ new(Slot: (int)DirectTarget.D0, ModeMask: 0xAF, SourceBit: 4, TargetShift: 5),
+ new(Slot: (int)DirectTarget.D1, ModeMask: 0xAF, SourceBit: 5, TargetShift: 5),
+ new(Slot: (int)DirectTarget.D0, ModeMask: 0x05, SourceBit: 2, TargetShift: 6),
+ new(Slot: (int)DirectTarget.D1, ModeMask: 0x05, SourceBit: 3, TargetShift: 6),
+ ];
+#pragma warning restore SA1201
+
+ ///
+ /// Applies a mode-gated bit-placement table. For each row, if the current one-hot mode
+ /// matches , the bit at the row's source index is
+ /// OR'd into [p.Slot] at the row's target shift.
+ ///
+ /// The table rows to apply (constant per decoder).
+ /// 1 << modeValue — the one-hot mode selector.
+ /// The per-bit source values extracted from the v-inputs.
+ /// The output slots; each entry is OR'd in place.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void ApplyBitPlacements(
+ ReadOnlySpan placements,
+ int oneHotMode,
+ ReadOnlySpan sourceBits,
+ Span targets)
+ {
+ foreach (BitPlacement p in placements)
+ {
+ if ((oneHotMode & p.ModeMask) != 0)
+ {
+ targets[p.Slot] |= sourceBits[p.SourceBit] << p.TargetShift;
+ }
+ }
+ }
+
+ ///
+ /// Swaps the R/G/B channels of a 12-bit HDR endpoint pair according to
+ /// (ASTC spec §C.2.14) and shifts each channel left
+ /// by 4 to produce the FP16 bit patterns stored in the returned
+ /// pair; alpha is set to .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static (Rgba64 Low, Rgba64 High) PackHdrRgbPairWithSwap(
+ int red0,
+ int green0,
+ int blue0,
+ int red1,
+ int green1,
+ int blue1,
+ int majorComponent)
+ {
+ (red0, green0, blue0, red1, green1, blue1) = majorComponent switch
+ {
+ 1 => (green0, red0, blue0, green1, red1, blue1),
+ 2 => (blue0, green0, red0, blue1, green1, red1),
+ _ => (red0, green0, blue0, red1, green1, blue1)
+ };
+
+ Rgba64 low = new((ushort)(red0 << 4), (ushort)(green0 << 4), (ushort)(blue0 << 4), Fp16.One);
+ Rgba64 high = new((ushort)(red1 << 4), (ushort)(green1 << 4), (ushort)(blue1 << 4), Fp16.One);
+ return (low, high);
+ }
+
+ ///
+ /// Decodes HDR endpoints from already-unquantized values. Called from the fused decode
+ /// path where BISE decode + batch unquantize have already been performed.
+ ///
+ public static (Rgba64 Low, Rgba64 High) DecodeHdrModeUnquantized(ReadOnlySpan value, ColorEndpointMode mode) => mode switch
+ {
+ ColorEndpointMode.HdrLumaLargeRange => UnpackHdrLuminanceLargeRangeCore(value[0], value[1]),
+ ColorEndpointMode.HdrLumaSmallRange => UnpackHdrLuminanceSmallRangeCore(value[0], value[1]),
+ ColorEndpointMode.HdrRgbBaseScale => UnpackHdrRgbBaseScaleCore(value[0], value[1], value[2], value[3]),
+ ColorEndpointMode.HdrRgbDirect => UnpackHdrRgbDirectCore(value[0], value[1], value[2], value[3], value[4], value[5]),
+ ColorEndpointMode.HdrRgbDirectLdrAlpha => UnpackHdrRgbDirectLdrAlphaCore(value),
+ ColorEndpointMode.HdrRgbDirectHdrAlpha => UnpackHdrRgbDirectHdrAlphaCore(value),
+ _ => throw new InvalidOperationException($"Mode {mode} is not an HDR mode")
+ };
+
+ ///
+ /// Performs an unsigned left shift of a signed value, avoiding undefined behavior
+ /// that would occur with signed left shift of negative values.
+ ///
+ private static int SafeSignedLeftShift(int value, int shift) => (int)((uint)value << shift);
+
+ ///
+ /// Decodes the HDR luminance large-range endpoint pair (CEM 2) per ASTC spec §C.2.14.
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrLuminanceLargeRangeCore(int v0, int v1)
+ {
+ int y0, y1;
+ if (v1 >= v0)
+ {
+ y0 = v0 << 4;
+ y1 = v1 << 4;
+ }
+ else
+ {
+ y0 = (v1 << 4) + 8;
+ y1 = (v0 << 4) - 8;
+ }
+
+ Rgba64 low = new((ushort)(y0 << 4), (ushort)(y0 << 4), (ushort)(y0 << 4), Fp16.One);
+ Rgba64 high = new((ushort)(y1 << 4), (ushort)(y1 << 4), (ushort)(y1 << 4), Fp16.One);
+ return (low, high);
+ }
+
+ ///
+ /// Decodes the HDR luminance small-range endpoint pair (CEM 3) per ASTC spec §C.2.14.
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrLuminanceSmallRangeCore(int v0, int v1)
+ {
+ int y0, y1;
+ if ((v0 & 0x80) != 0)
+ {
+ y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
+ y1 = (v1 & 0x1F) << 2;
+ }
+ else
+ {
+ y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
+ y1 = (v1 & 0x0F) << 1;
+ }
+
+ y1 += y0;
+ if (y1 > 0xFFF)
+ {
+ y1 = 0xFFF;
+ }
+
+ Rgba64 low = new((ushort)(y0 << 4), (ushort)(y0 << 4), (ushort)(y0 << 4), Fp16.One);
+ Rgba64 high = new((ushort)(y1 << 4), (ushort)(y1 << 4), (ushort)(y1 << 4), Fp16.One);
+ return (low, high);
+ }
+
+ ///
+ /// Decodes the HDR RGB base+scale endpoint pair (CEM 7) per ASTC spec §C.2.14. Selects
+ /// one of six sub-modes from the high bits of v0/v1/v2, then re-routes individual bits
+ /// into the four 12-bit output slots via and applies
+ /// the major-component channel swap.
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrRgbBaseScaleCore(int v0, int v1, int v2, int v3)
+ {
+ int modeValue = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
+
+ (int majorComponent, int mode) = modeValue switch
+ {
+ _ when (modeValue & 0xC) != 0xC => (modeValue >> 2, modeValue & 3),
+ not 0xF => (modeValue & 3, 4),
+ _ => (0, 5)
+ };
+
+ // Targets indexed by BaseScaleTarget: [Red, Green, Blue, Scale].
+ Span targets =
+ [
+ v0 & 0x3F,
+ v1 & 0x1F,
+ v2 & 0x1F,
+ v3 & 0x1F,
+ ];
+
+ Span sourceBits =
+ [
+ (v1 >> 6) & 1,
+ (v1 >> 5) & 1,
+ (v2 >> 6) & 1,
+ (v2 >> 5) & 1,
+ (v3 >> 7) & 1,
+ (v3 >> 6) & 1,
+ (v3 >> 5) & 1,
+ ];
+
+ ApplyBitPlacements(BaseScalePlacements, oneHotMode: 1 << mode, sourceBits, targets);
+
+ int red = targets[(int)BaseScaleTarget.Red];
+ int green = targets[(int)BaseScaleTarget.Green];
+ int blue = targets[(int)BaseScaleTarget.Blue];
+ int scale = targets[(int)BaseScaleTarget.Scale];
+
+ int shiftAmount = BaseScaleShiftByMode[mode];
+ red <<= shiftAmount;
+ green <<= shiftAmount;
+ blue <<= shiftAmount;
+ scale <<= shiftAmount;
+
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ // Low endpoint = base minus scale; clamp negatives to zero before channel swap.
+ int red0 = Math.Max(red - scale, 0);
+ int green0 = Math.Max(green - scale, 0);
+ int blue0 = Math.Max(blue - scale, 0);
+ int red1 = Math.Max(red, 0);
+ int green1 = Math.Max(green, 0);
+ int blue1 = Math.Max(blue, 0);
+
+ return PackHdrRgbPairWithSwap(red0, green0, blue0, red1, green1, blue1, majorComponent);
+ }
+
+ ///
+ /// Decodes the HDR RGB direct endpoint pair (CEM 11) per ASTC spec §C.2.14. Selects
+ /// one of eight sub-modes (5 + 3 reserved) from high bits of the v-inputs, then routes
+ /// per-bit data through , decodes deltas and majorness,
+ /// and produces the channel-swapped 12-bit endpoint pair.
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrRgbDirectCore(int v0, int v1, int v2, int v3, int v4, int v5)
+ {
+ int modeValue = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
+ int majorComponent = ((v4 & 0x80) >> 7) | (((v5 & 0x80) >> 7) << 1);
+
+ // majorComponent == 3: skip bit-placement tree and use direct passthrough of v0..v5.
+ if (majorComponent == 3)
+ {
+ Rgba64 passthroughLow = new((ushort)(v0 << 8), (ushort)(v2 << 8), (ushort)((v4 & 0x7F) << 9), Fp16.One);
+ Rgba64 passthroughHigh = new((ushort)(v1 << 8), (ushort)(v3 << 8), (ushort)((v5 & 0x7F) << 9), Fp16.One);
+ return (passthroughLow, passthroughHigh);
+ }
+
+ // Targets indexed by DirectTarget: [A, B0, B1, C, D0, D1].
+ Span targets =
+ [
+ v0 | ((v1 & 0x40) << 2),
+ v2 & 0x3F,
+ v3 & 0x3F,
+ v1 & 0x3F,
+ v4 & 0x7F,
+ v5 & 0x7F,
+ ];
+
+ Span sourceBits =
+ [
+ (v2 >> 6) & 1,
+ (v3 >> 6) & 1,
+ (v4 >> 6) & 1,
+ (v5 >> 6) & 1,
+ (v4 >> 5) & 1,
+ (v5 >> 5) & 1,
+ ];
+
+ ApplyBitPlacements(DirectPlacements, oneHotMode: 1 << modeValue, sourceBits, targets);
+
+ int a = targets[(int)DirectTarget.A];
+ int b0 = targets[(int)DirectTarget.B0];
+ int b1 = targets[(int)DirectTarget.B1];
+ int c = targets[(int)DirectTarget.C];
+ int d0 = targets[(int)DirectTarget.D0];
+ int d1 = targets[(int)DirectTarget.D1];
+
+ // Sign-extend the signed offsets d0, d1 based on mode-specific data-bit width.
+ int dataBits = DirectDataBitsByMode[modeValue];
+ int signExtendShift = 32 - dataBits;
+ d0 = (d0 << signExtendShift) >> signExtendShift;
+ d1 = (d1 << signExtendShift) >> signExtendShift;
+
+ // Expand to 12 bits: per ARM reference, shift amount depends on mode.
+ int valueShift = (modeValue >> 1) ^ 3;
+ a = SafeSignedLeftShift(a, valueShift);
+ b0 = SafeSignedLeftShift(b0, valueShift);
+ b1 = SafeSignedLeftShift(b1, valueShift);
+ c = SafeSignedLeftShift(c, valueShift);
+ d0 = SafeSignedLeftShift(d0, valueShift);
+ d1 = SafeSignedLeftShift(d1, valueShift);
+
+ // Compose high and low endpoints per ASTC spec §C.2.14, then clamp to [0, 0xFFF].
+ int red1 = Math.Clamp(a, 0, 0xFFF);
+ int green1 = Math.Clamp(a - b0, 0, 0xFFF);
+ int blue1 = Math.Clamp(a - b1, 0, 0xFFF);
+ int red0 = Math.Clamp(a - c, 0, 0xFFF);
+ int green0 = Math.Clamp(a - b0 - c - d0, 0, 0xFFF);
+ int blue0 = Math.Clamp(a - b1 - c - d1, 0, 0xFFF);
+
+ return PackHdrRgbPairWithSwap(red0, green0, blue0, red1, green1, blue1, majorComponent);
+ }
+
+ ///
+ /// Decodes the CEM 14 endpoint pair (HDR RGB + LDR alpha) per ASTC spec §C.2.14.
+ /// RGB is decoded as for CEM 11; alpha is bit-replicated UNORM16 (the same expansion
+ /// LDR endpoints use, so the HDR pipeline can blend it as if it were HDR).
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrRgbDirectLdrAlphaCore(ReadOnlySpan unquantizedValues)
+ {
+ (Rgba64 rgbLow, Rgba64 rgbHigh) = UnpackHdrRgbDirectCore(unquantizedValues[0], unquantizedValues[1], unquantizedValues[2], unquantizedValues[3], unquantizedValues[4], unquantizedValues[5]);
+
+ ushort alpha0 = (ushort)(unquantizedValues[6] * 257);
+ ushort alpha1 = (ushort)(unquantizedValues[7] * 257);
+
+ Rgba64 low = new(rgbLow.R, rgbLow.G, rgbLow.B, alpha0);
+ Rgba64 high = new(rgbHigh.R, rgbHigh.G, rgbHigh.B, alpha1);
+ return (low, high);
+ }
+
+ ///
+ /// Decodes the CEM 15 endpoint pair (HDR RGB + HDR alpha) per ASTC spec §C.2.14.
+ ///
+ private static (Rgba64 Low, Rgba64 High) UnpackHdrRgbDirectHdrAlphaCore(ReadOnlySpan unquantizedValues)
+ {
+ (Rgba64 rgbLow, Rgba64 rgbHigh) = UnpackHdrRgbDirectCore(unquantizedValues[0], unquantizedValues[1], unquantizedValues[2], unquantizedValues[3], unquantizedValues[4], unquantizedValues[5]);
+
+ (ushort alpha0, ushort alpha1) = UnpackHdrAlpha(unquantizedValues[6], unquantizedValues[7]);
+
+ Rgba64 low = new(rgbLow.R, rgbLow.G, rgbLow.B, alpha0);
+ Rgba64 high = new(rgbHigh.R, rgbHigh.G, rgbHigh.B, alpha1);
+ return (low, high);
+ }
+
+ ///
+ /// Decodes the HDR alpha pair shared by CEM 15 per ASTC spec §C.2.14: a 2-bit selector
+ /// from the high bits of v6/v7 picks one of four sub-modes that determine how the low
+ /// 7 bits of each input map to the 12-bit alpha endpoints (a0, a1).
+ ///
+ private static (ushort Low, ushort High) UnpackHdrAlpha(int v6, int v7)
+ {
+ int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
+ v6 &= 0x7F;
+ v7 &= 0x7F;
+
+ int a0, a1;
+
+ if (selector == 3)
+ {
+ // Simple mode: direct 7-bit values shifted to 12-bit
+ a0 = v6 << 5;
+ a1 = v7 << 5;
+ }
+ else
+ {
+ // Complex mode: base + sign-extended offset
+ v6 |= (v7 << (selector + 1)) & 0x780;
+ v7 &= 0x3F >> selector;
+ v7 ^= 32 >> selector;
+ v7 -= 32 >> selector;
+ v6 <<= 4 - selector;
+ v7 <<= 4 - selector;
+ v7 += v6;
+
+ if (v7 < 0)
+ {
+ v7 = 0;
+ }
+ else if (v7 > 0xFFF)
+ {
+ v7 = 0xFFF;
+ }
+
+ a0 = v6;
+ a1 = v7;
+ }
+
+ a0 = Math.Clamp(a0, 0, 0xFFF);
+ a1 = Math.Clamp(a1, 0, 0xFFF);
+
+ return ((ushort)(a0 << 4), (ushort)(a1 << 4));
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba32Extensions.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba32Extensions.cs
new file mode 100644
index 00000000..996b4976
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba32Extensions.cs
@@ -0,0 +1,83 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+///
+/// ASTC-specific extension methods and helpers for .
+///
+internal static class Rgba32Extensions
+{
+ ///
+ /// Creates an from integer values, clamping each channel to [0, 255].
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Rgba32 ClampedRgba32(int r, int g, int b, int a = byte.MaxValue)
+ => new(
+ (byte)Math.Clamp(r, byte.MinValue, byte.MaxValue),
+ (byte)Math.Clamp(g, byte.MinValue, byte.MaxValue),
+ (byte)Math.Clamp(b, byte.MinValue, byte.MaxValue),
+ (byte)Math.Clamp(a, byte.MinValue, byte.MaxValue));
+
+ ///
+ /// Gets the rounded arithmetic mean of the R, G, and B channels.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte GetAverage(this Rgba32 color)
+ {
+ int sum = color.R + color.G + color.B;
+ return (byte)(((sum * 256) + 384) / 768);
+ }
+
+ ///
+ /// Gets the channel value at the specified index: 0=R, 1=G, 2=B, 3=A.
+ ///
+ ///
+ /// Reads the sequential [R, G, B, A] byte layout of directly.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetChannel(this in Rgba32 color, int i)
+ {
+ if ((uint)i >= 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(i), $"Index must be between 0 and 3. Actual value: {i}.");
+ }
+
+ return Unsafe.Add(ref Unsafe.As(ref Unsafe.AsRef(in color)), i);
+ }
+
+ ///
+ /// Computes the sum of squared per-channel differences across all four RGBA channels.
+ ///
+ public static int SquaredError(Rgba32 a, Rgba32 b)
+ {
+ int dr = a.R - b.R;
+ int dg = a.G - b.G;
+ int db = a.B - b.B;
+ int da = a.A - b.A;
+ return (dr * dr) + (dg * dg) + (db * db) + (da * da);
+ }
+
+ ///
+ /// Computes the sum of squared per-channel differences for the RGB channels only, ignoring alpha.
+ ///
+ public static int SquaredErrorRgb(Rgba32 a, Rgba32 b)
+ {
+ int dr = a.R - b.R;
+ int dg = a.G - b.G;
+ int db = a.B - b.B;
+ return (dr * dr) + (dg * dg) + (db * db);
+ }
+
+ ///
+ /// Returns true if all four channels are within the specified tolerance of the other color.
+ ///
+ public static bool IsCloseTo(this Rgba32 color, Rgba32 other, int tolerance)
+ => Math.Abs(color.R - other.R) <= tolerance &&
+ Math.Abs(color.G - other.G) <= tolerance &&
+ Math.Abs(color.B - other.B) <= tolerance &&
+ Math.Abs(color.A - other.A) <= tolerance;
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba64Extensions.cs b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba64Extensions.cs
new file mode 100644
index 00000000..21a9f557
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/ColorEncoding/Rgba64Extensions.cs
@@ -0,0 +1,39 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+///
+/// ASTC-specific extension methods and helpers for .
+///
+internal static class Rgba64Extensions
+{
+ ///
+ /// Gets the channel value at the specified index: 0=R, 1=G, 2=B, 3=A.
+ ///
+ ///
+ /// Reads the sequential [R, G, B, A] ushort layout of directly.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort GetChannel(this in Rgba64 color, int i)
+ {
+ if ((uint)i >= 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(i), $"Index must be between 0 and 3. Actual value: {i}.");
+ }
+
+ return Unsafe.Add(ref Unsafe.As(ref Unsafe.AsRef(in color)), i);
+ }
+
+ ///
+ /// Returns true if all four channels are within the specified tolerance of the other color.
+ ///
+ public static bool IsCloseTo(this Rgba64 color, Rgba64 other, int tolerance)
+ => Math.Abs(color.R - other.R) <= tolerance &&
+ Math.Abs(color.G - other.G) <= tolerance &&
+ Math.Abs(color.B - other.B) <= tolerance &&
+ Math.Abs(color.A - other.A) <= tolerance;
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/BitOperations.cs b/src/ImageSharp.Textures/Compression/Astc/Core/BitOperations.cs
new file mode 100644
index 00000000..59ee712e
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/BitOperations.cs
@@ -0,0 +1,48 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+internal static class BitOperations
+{
+ ///
+ /// Return the specified range as a (low bits in lower 64 bits)
+ ///
+ public static UInt128 GetBits(UInt128 value, int start, int length) => length switch
+ {
+ <= 0 => UInt128.Zero,
+ >= 128 => value >> start,
+ _ => (value >> start) & (UInt128.MaxValue >> (128 - length))
+ };
+
+ ///
+ /// Return the specified range as a ulong
+ ///
+ public static ulong GetBits(ulong value, int start, int length) => length switch
+ {
+ <= 0 => 0UL,
+ >= 64 => value >> start,
+ _ => (value >> start) & (ulong.MaxValue >> (64 - length))
+ };
+
+ ///
+ /// Transfers a few bits of precision from one value to another.
+ ///
+ ///
+ /// The 'bit_transfer_signed' function defined in Section C.2.14 of the ASTC specification
+ ///
+ public static (int A, int B) TransferPrecision(int a, int b)
+ {
+ b >>= 1;
+ b |= a & 0x80;
+ a >>= 1;
+ a &= 0x3F;
+
+ if ((a & 0x20) != 0)
+ {
+ a -= 0x40;
+ }
+
+ return (a, b);
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/BlockInfo.cs b/src/ImageSharp.Textures/Compression/Astc/Core/BlockInfo.cs
new file mode 100644
index 00000000..d8b3f72e
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/BlockInfo.cs
@@ -0,0 +1,148 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Textures.Compression.Astc.ColorEncoding;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Decoded block-mode metadata for a single 128-bit ASTC block. Populated by the block-mode
+/// parser (produces an instance via BlockModeDecoder.Decode).
+///
+internal readonly struct BlockInfo
+{
+ /// Every ASTC compressed block is exactly 128 bits (16 bytes) regardless of footprint (spec §C.2.4).
+ public const int SizeInBytes = 16;
+
+ ///
+ /// Number of output channels per decoded pixel — RGBA in both the LDR (UNORM8) and HDR
+ /// (float32) profiles. Used as a multiplier on to size
+ /// scratch and image buffers.
+ ///
+ public const int ChannelsPerPixel = 4;
+
+ public BlockInfo(
+ bool isVoidExtent,
+ bool isHdr,
+ WeightGrid weights,
+ int partitionCount,
+ DualPlaneInfo dualPlane,
+ ColorEndpoints colors,
+ EndpointModeBuffer endpointModes)
+ {
+ this.IsValid = true;
+ this.IsVoidExtent = isVoidExtent;
+ this.IsHdr = isHdr;
+ this.Weights = weights;
+ this.PartitionCount = partitionCount;
+ this.DualPlane = dualPlane;
+ this.Colors = colors;
+ this.EndpointModes = endpointModes;
+ }
+
+ private BlockInfo(bool isMalformedVoidExtent)
+ {
+ this.IsValid = false;
+ this.IsVoidExtent = isMalformedVoidExtent;
+ }
+
+ ///
+ /// Gets a malformed void-extent block (spec §C.2.23 — reserved bits or coordinates
+ /// invalid). is true, all other properties are default.
+ ///
+ public static BlockInfo MalformedVoidExtent { get; } = new(isMalformedVoidExtent: true);
+
+ ///
+ /// Gets a value indicating whether the block is a legal ASTC encoding. False for reserved
+ /// block modes and malformed void-extent blocks (ASTC spec §C.2.10, §C.2.23); both fast and
+ /// general decode paths skip invalid blocks, leaving zeros in the output.
+ ///
+ public bool IsValid { get; }
+
+ ///
+ /// Gets a value indicating whether the block is a void-extent (single-colour) block, per
+ /// ASTC spec §C.2.23.
+ ///
+ public bool IsVoidExtent { get; }
+
+ ///
+ /// Gets a value indicating whether this block encodes HDR content. For void-extent blocks
+ /// this is the dynamic-range flag at bit 9 of the block mode (FP16 vs UNORM16, ASTC spec
+ /// §C.2.23); for normal blocks it's true if any partition uses an HDR endpoint mode (spec
+ /// §C.2.14: modes 2, 3, 7, 11, 14, 15). Used by the LDR decoder to reject HDR content
+ /// before dispatch per §C.2.19.
+ ///
+ public bool IsHdr { get; }
+
+ ///
+ /// Gets the weight-grid metadata: dimensions, BISE range, and packed bit count
+ /// (ASTC spec §C.2.10, §C.2.16).
+ ///
+ public WeightGrid Weights { get; }
+
+ ///
+ /// Gets the number of colour-endpoint partitions in the block (1..4, ASTC spec §C.2.10).
+ /// Zero for void-extent blocks, which carry no partitions.
+ ///
+ public int PartitionCount { get; }
+
+ ///
+ /// Gets the dual-plane configuration: whether a second weight plane is present and which
+ /// channel it drives (ASTC spec §C.2.20).
+ ///
+ public DualPlaneInfo DualPlane { get; }
+
+ ///
+ /// Gets the colour-endpoint bit region — start bit, bit count, BISE range, and value
+ /// count (ASTC spec §C.2.22).
+ ///
+ public ColorEndpoints Colors { get; }
+
+ ///
+ /// Gets the per-partition colour endpoint modes (ASTC spec §C.2.11, §C.2.14). Only the
+ /// first slots are populated; access via
+ /// or .
+ ///
+ public EndpointModeBuffer EndpointModes { get; }
+
+ ///
+ /// Gets the colour endpoint mode for partition 0 — the only partition for single-partition
+ /// blocks, and a convenience accessor for the fused fast path.
+ ///
+ public ColorEndpointMode EndpointMode0 => this.EndpointModes[0];
+
+ ///
+ /// Gets a value indicating whether the block can take the fused fast path:
+ /// single-partition, single-plane, non-void-extent (the common shape per ASTC spec
+ /// §C.2.10, §C.2.20, §C.2.23). Multi-partition, dual-plane, and void-extent blocks fall
+ /// through to the general logical-block pipeline.
+ ///
+ public bool IsFusable
+ => !this.IsVoidExtent && this.PartitionCount == 1 && !this.DualPlane.Enabled;
+
+ ///
+ /// Gets the colour endpoint mode for the given partition index. Only the first
+ /// slots in are populated by
+ /// ; the trailing slots retain their
+ /// default(ColorEndpointMode) value and reading them would silently return
+ /// .
+ ///
+ ///
+ /// Thrown when is outside
+ /// [0, ).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ColorEndpointMode GetEndpointMode(int partition)
+ => (uint)partition < (uint)this.PartitionCount
+ ? this.EndpointModes[partition]
+ : throw new ArgumentOutOfRangeException(nameof(partition), partition, $"Must be in [0, PartitionCount={this.PartitionCount}).");
+
+ [InlineArray(4)]
+ public struct EndpointModeBuffer
+ {
+#pragma warning disable CS0169, IDE0051, S1144 // Accessed by runtime via [InlineArray]
+ private ColorEndpointMode element0;
+#pragma warning restore CS0169, IDE0051, S1144
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/ColorEndpoints.cs b/src/ImageSharp.Textures/Compression/Astc/Core/ColorEndpoints.cs
new file mode 100644
index 00000000..78c9202a
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/ColorEndpoints.cs
@@ -0,0 +1,10 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Colour-endpoint bit-region metadata (ASTC spec §C.2.22 — colour endpoint range and bit
+/// budget are derived from the remaining-bits computation).
+///
+internal readonly record struct ColorEndpoints(int StartBit, int BitCount, int Range, int Count);
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/DecimationInfo.cs b/src/ImageSharp.Textures/Compression/Astc/Core/DecimationInfo.cs
new file mode 100644
index 00000000..dc51355a
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/DecimationInfo.cs
@@ -0,0 +1,39 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Pre-computed weight infill data for a specific (footprint, weightGridX, weightGridY) combination.
+/// Stores bilinear interpolation indices and factors in a transposed layout.
+///
+internal sealed class DecimationInfo
+{
+ private readonly int[] weightIndices;
+ private readonly int[] weightFactors;
+
+ // Transposed layout: [contribution * TexelCount + texel]
+ // 4 contributions per texel (bilinear interpolation from weight grid).
+ // For edge texels where some grid points are out of bounds, factor is 0 and index is 0.
+ public DecimationInfo(int texelCount, int[] weightIndices, int[] weightFactors)
+ {
+ this.TexelCount = texelCount;
+ this.weightIndices = weightIndices;
+ this.weightFactors = weightFactors;
+ }
+
+ public int TexelCount { get; }
+
+ ///
+ /// Gets the per-texel grid-point indices (length 4 * ) in the
+ /// transposed [contribution * TexelCount + texel] layout. Cached and shared across blocks
+ /// that resolve to the same (footprint, weight-grid) pair.
+ ///
+ public ReadOnlySpan WeightIndices => this.weightIndices;
+
+ ///
+ /// Gets the per-texel bilinear weight factors (length 4 * ) in
+ /// the same transposed layout as .
+ ///
+ public ReadOnlySpan WeightFactors => this.weightFactors;
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/DecimationTable.cs b/src/ImageSharp.Textures/Compression/Astc/Core/DecimationTable.cs
new file mode 100644
index 00000000..c735f92c
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/DecimationTable.cs
@@ -0,0 +1,169 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Caches pre-computed DecimationInfo tables and provides weight infill.
+/// For each unique (footprint, gridX, gridY) combination, the bilinear interpolation
+/// indices and factors are computed once and reused for every block with that configuration.
+/// Uses a flat array indexed by (footprintType, gridX, gridY) for O(1) lookup.
+///
+internal static class DecimationTable
+{
+ // Grid dimensions range from 2 to 12 inclusive
+ private const int GridMin = 2;
+ private const int GridRange = 11; // 12 - 2 + 1
+ private const int FootprintCount = 14;
+ private static readonly DecimationInfo?[] Table = new DecimationInfo?[FootprintCount * GridRange * GridRange];
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static DecimationInfo Get(Footprint footprint, int gridX, int gridY)
+ {
+ int index = ((int)footprint.Type * GridRange * GridRange) + ((gridX - GridMin) * GridRange) + (gridY - GridMin);
+
+ // Volatile.Read pairs with the implicit release on CompareExchange to publish the
+ // fully-constructed DecimationInfo. Entries are immutable, so losing the CAS race
+ // is harmless — the caller discards its own instance and uses the winner.
+ DecimationInfo? decimationInfo = Volatile.Read(ref Table[index]);
+ if (decimationInfo is null)
+ {
+ DecimationInfo computed = Compute(footprint.Width, footprint.Height, gridX, gridY);
+ decimationInfo = Interlocked.CompareExchange(ref Table[index], computed, null) ?? computed;
+ }
+
+ return decimationInfo;
+ }
+
+ ///
+ /// Performs weight infill using pre-computed tables.
+ /// Maps unquantized grid weights to per-texel weights via bilinear interpolation
+ /// with pre-computed indices and factors.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+ public static void InfillWeights(ReadOnlySpan gridWeights, DecimationInfo decimationInfo, Span result)
+ {
+ int texelCount = decimationInfo.TexelCount;
+ ReadOnlySpan weightIndices = decimationInfo.WeightIndices;
+ ReadOnlySpan weightFactors = decimationInfo.WeightFactors;
+ int offset1 = texelCount, offset2 = texelCount * 2, offset3 = texelCount * 3;
+
+ for (int i = 0; i < texelCount; i++)
+ {
+ result[i] = (8
+ + (gridWeights[weightIndices[i]] * weightFactors[i])
+ + (gridWeights[weightIndices[offset1 + i]] * weightFactors[offset1 + i])
+ + (gridWeights[weightIndices[offset2 + i]] * weightFactors[offset2 + i])
+ + (gridWeights[weightIndices[offset3 + i]] * weightFactors[offset3 + i])) >> 4;
+ }
+ }
+
+ ///
+ /// Scale factor for mapping texel index to grid position (ASTC spec §C.2.18)
+ ///
+ private static int GetScaleFactorD(int blockDimensions) => (1024 + (blockDimensions >> 1)) / (blockDimensions - 1);
+
+ ///
+ /// Builds the weight-infill lookup for one (footprint, weight-grid) combination.
+ /// For each texel, computes the four surrounding weight-grid indices and bilinear
+ /// interpolation factors (ASTC spec §C.2.18), storing them in parallel transposed
+ /// arrays so that decode can iterate by contribution slot.
+ ///
+ private static DecimationInfo Compute(int footprintWidth, int footprintHeight, int gridWidth, int gridHeight)
+ {
+ int texelCount = footprintWidth * footprintHeight;
+ int[] indices = new int[4 * texelCount];
+ int[] factors = new int[4 * texelCount];
+
+ int scaleHorizontal = GetScaleFactorD(footprintWidth);
+ int scaleVertical = GetScaleFactorD(footprintHeight);
+ int gridLimit = gridWidth * gridHeight;
+ int maxGridX = gridWidth - 1;
+ int maxGridY = gridHeight - 1;
+
+ int texelIndex = 0;
+ for (int texelY = 0; texelY < footprintHeight; ++texelY)
+ {
+ (int gridRowIndex, int fractionY) = MapTexelToGridAxis(texelY, scaleVertical, maxGridY);
+ for (int texelX = 0; texelX < footprintWidth; ++texelX)
+ {
+ (int gridColIndex, int fractionX) = MapTexelToGridAxis(texelX, scaleHorizontal, maxGridX);
+ StoreTexelContributions(texelIndex, texelCount, indices, factors, gridColIndex, gridRowIndex, fractionX, fractionY, gridWidth, gridLimit);
+ texelIndex++;
+ }
+ }
+
+ return new DecimationInfo(texelCount, indices, factors);
+ }
+
+ ///
+ /// Maps a texel coordinate along one axis to the (gridIndex, fraction) pair used for
+ /// bilinear interpolation. The grid index is in Q4 fixed-point (top bits) and the
+ /// fraction occupies the low four bits.
+ ///
+ private static (int GridIndex, int Fraction) MapTexelToGridAxis(int texel, int scale, int maxGrid)
+ {
+ int scaled = scale * texel;
+ int grid = ((scaled * maxGrid) + 32) >> 6;
+ return (grid >> 4, grid & 0xF);
+ }
+
+ ///
+ /// Computes the four (gridPoint, factor) contributions for one texel and writes them
+ /// into the transposed output arrays. Each contribution slot has
+ /// entries so lookups at decode time touch contiguous memory per slot.
+ /// Out-of-bounds grid points collapse to index 0 with a zero factor.
+ ///
+ private static void StoreTexelContributions(
+ int texelIndex,
+ int texelCount,
+ int[] indices,
+ int[] factors,
+ int gridColIndex,
+ int gridRowIndex,
+ int fractionX,
+ int fractionY,
+ int gridWidth,
+ int gridLimit)
+ {
+ int gridPoint0 = gridColIndex + (gridWidth * gridRowIndex);
+ int gridPoint1 = gridPoint0 + 1;
+ int gridPoint2 = gridColIndex + (gridWidth * (gridRowIndex + 1));
+ int gridPoint3 = gridPoint2 + 1;
+
+ int factor3 = ((fractionX * fractionY) + 8) >> 4;
+ int factor2 = fractionY - factor3;
+ int factor1 = fractionX - factor3;
+ int factor0 = 16 - fractionX - fractionY + factor3;
+
+ ClampGridPoint(ref gridPoint0, ref factor0, gridLimit);
+ ClampGridPoint(ref gridPoint1, ref factor1, gridLimit);
+ ClampGridPoint(ref gridPoint2, ref factor2, gridLimit);
+ ClampGridPoint(ref gridPoint3, ref factor3, gridLimit);
+
+ indices[texelIndex] = gridPoint0;
+ indices[texelCount + texelIndex] = gridPoint1;
+ indices[(2 * texelCount) + texelIndex] = gridPoint2;
+ indices[(3 * texelCount) + texelIndex] = gridPoint3;
+
+ factors[texelIndex] = factor0;
+ factors[texelCount + texelIndex] = factor1;
+ factors[(2 * texelCount) + texelIndex] = factor2;
+ factors[(3 * texelCount) + texelIndex] = factor3;
+ }
+
+ ///
+ /// Replaces an out-of-bounds grid point with a safe dummy index (0) and zeros its
+ /// contribution factor so the corresponding term drops out of the bilinear blend.
+ ///
+ private static void ClampGridPoint(ref int gridPoint, ref int factor, int gridLimit)
+ {
+ if (gridPoint >= gridLimit)
+ {
+ factor = 0;
+ gridPoint = 0;
+ }
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/DualPlaneInfo.cs b/src/ImageSharp.Textures/Compression/Astc/Core/DualPlaneInfo.cs
new file mode 100644
index 00000000..01732d84
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/DualPlaneInfo.cs
@@ -0,0 +1,10 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Dual-plane configuration (ASTC spec §C.2.20). When is false,
+/// is unused.
+///
+internal readonly record struct DualPlaneInfo(bool Enabled, int Channel);
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/Footprint.cs b/src/ImageSharp.Textures/Compression/Astc/Core/Footprint.cs
new file mode 100644
index 00000000..64d808b6
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/Footprint.cs
@@ -0,0 +1,58 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Represents the dimensions of an ASTC block footprint.
+///
+public readonly record struct Footprint
+{
+ private static readonly Footprint[] All =
+ [
+ new(FootprintType.Footprint4x4, 4, 4),
+ new(FootprintType.Footprint5x4, 5, 4),
+ new(FootprintType.Footprint5x5, 5, 5),
+ new(FootprintType.Footprint6x5, 6, 5),
+ new(FootprintType.Footprint6x6, 6, 6),
+ new(FootprintType.Footprint8x5, 8, 5),
+ new(FootprintType.Footprint8x6, 8, 6),
+ new(FootprintType.Footprint8x8, 8, 8),
+ new(FootprintType.Footprint10x5, 10, 5),
+ new(FootprintType.Footprint10x6, 10, 6),
+ new(FootprintType.Footprint10x8, 10, 8),
+ new(FootprintType.Footprint10x10, 10, 10),
+ new(FootprintType.Footprint12x10, 12, 10),
+ new(FootprintType.Footprint12x12, 12, 12),
+ ];
+
+ private Footprint(FootprintType type, int width, int height)
+ {
+ this.Type = type;
+ this.Width = width;
+ this.Height = height;
+ this.PixelCount = width * height;
+ }
+
+ /// Gets the block width in texels.
+ public int Width { get; }
+
+ /// Gets the block height in texels.
+ public int Height { get; }
+
+ /// Gets the footprint type enum value.
+ public FootprintType Type { get; }
+
+ /// Gets the total number of texels in the block (Width * Height).
+ public int PixelCount { get; }
+
+ ///
+ /// Creates a from the specified .
+ ///
+ /// The footprint type to create a footprint from.
+ /// A matching the specified type.
+ public static Footprint FromFootprintType(FootprintType type)
+ => (uint)type < (uint)All.Length
+ ? All[(int)type]
+ : throw new ArgumentOutOfRangeException(nameof(type), $"Invalid FootprintType: {type}");
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/FootprintType.cs b/src/ImageSharp.Textures/Compression/Astc/Core/FootprintType.cs
new file mode 100644
index 00000000..381d3510
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/FootprintType.cs
@@ -0,0 +1,52 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// The supported ASTC block footprint sizes.
+///
+public enum FootprintType
+{
+ /// 4x4 texel block.
+ Footprint4x4,
+
+ /// 5x4 texel block.
+ Footprint5x4,
+
+ /// 5x5 texel block.
+ Footprint5x5,
+
+ /// 6x5 texel block.
+ Footprint6x5,
+
+ /// 6x6 texel block.
+ Footprint6x6,
+
+ /// 8x5 texel block.
+ Footprint8x5,
+
+ /// 8x6 texel block.
+ Footprint8x6,
+
+ /// 8x8 texel block.
+ Footprint8x8,
+
+ /// 10x5 texel block.
+ Footprint10x5,
+
+ /// 10x6 texel block.
+ Footprint10x6,
+
+ /// 10x8 texel block.
+ Footprint10x8,
+
+ /// 10x10 texel block.
+ Footprint10x10,
+
+ /// 12x10 texel block.
+ Footprint12x10,
+
+ /// 12x12 texel block.
+ Footprint12x12,
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/Fp16.cs b/src/ImageSharp.Textures/Compression/Astc/Core/Fp16.cs
new file mode 100644
index 00000000..6f62a691
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/Fp16.cs
@@ -0,0 +1,68 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// IEEE 754 half-precision (FP16) constants and helpers used by the HDR decoder.
+///
+internal static class Fp16
+{
+ /// FP16 bit pattern for 1.0 (sign 0, exponent 15, mantissa 0).
+ public const ushort One = 0x7800;
+
+ /// FP16 bit pattern for the largest finite value (sign 0, exponent 30, mantissa all ones).
+ public const ushort MaxFinite = 0x7BFF;
+
+ ///
+ /// Converts a 16-bit LNS (Log-Normalized Space) value to a 16-bit SF16 (FP16) bit pattern
+ /// per ASTC spec §C.2.15.
+ ///
+ ///
+ /// The LNS value encodes a 5-bit exponent in the upper bits and an 11-bit mantissa
+ /// in the lower bits. The piecewise-linear mantissa transform (slope 3 / 4 / 5 across
+ /// the [0, 512), [512, 1536), [1536, 2048) intervals) and the +Inf/NaN clamp to
+ /// are taken verbatim from §C.2.15.
+ ///
+ public static ushort FromLns(int lns)
+ {
+ int mantissaComponent = lns & 0x7FF; // Lower 11 bits: mantissa component
+ int exponentComponent = (lns >> 11) & 0x1F; // Upper 5 bits: exponent component
+
+ // Spec §C.2.15: piecewise-linear log approximation, inflection at M = 512 and M = 1536.
+ int mantissaTransformed;
+ if (mantissaComponent < 512)
+ {
+ mantissaTransformed = mantissaComponent * 3;
+ }
+ else if (mantissaComponent < 1536)
+ {
+ mantissaTransformed = (mantissaComponent * 4) - 512;
+ }
+ else
+ {
+ mantissaTransformed = (mantissaComponent * 5) - 2048;
+ }
+
+ int result = (exponentComponent << 10) | (mantissaTransformed >> 3);
+ return (ushort)Math.Min(result, MaxFinite);
+ }
+
+ ///
+ /// Decodes a 16-bit LNS value to a single-precision float by converting through FP16,
+ /// per ASTC spec §C.2.15. The LNS value is passed through , reinterpreted
+ /// as FP16 bits, and widened to .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float LnsToFloat(int lns) => (float)BitConverter.UInt16BitsToHalf(FromLns(lns));
+
+ ///
+ /// Widens an FP16 bit pattern (already in SF16 form, no LNS conversion) to .
+ /// Used for HDR void-extent blocks (ASTC spec §C.2.23), whose channel values are stored as
+ /// FP16 bit patterns directly rather than as LNS values.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float Fp16ToFloat(ushort fp16Bits) => (float)BitConverter.UInt16BitsToHalf(fp16Bits);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/Interpolation.cs b/src/ImageSharp.Textures/Compression/Astc/Core/Interpolation.cs
new file mode 100644
index 00000000..8d9264dd
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/Interpolation.cs
@@ -0,0 +1,57 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Scalar weighted-blend primitives from ASTC spec §C.2.19 (Weight Application),
+/// shared by the fused fast paths and the general LogicalBlock pipeline.
+/// The weight is in the 6-bit range [0, 64]; callers pre-unquantise per §C.2.17.
+///
+internal static class Interpolation
+{
+ ///
+ /// Weighted blend of two values with the ASTC rounding convention from §C.2.19:
+ /// (p0 * (64 - weight) + p1 * weight + 32) / 64.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int BlendWeighted(int p0, int p1, int weight)
+ => ((p0 * (64 - weight)) + (p1 * weight) + 32) / 64;
+
+ ///
+ /// LDR-to-UNORM16 blend: each 8-bit endpoint is bit-replicated to 16 bits
+ /// ((p << 8) | p) per §C.2.19 before the weighted blend. Every LDR decode
+ /// path that produces 16-bit intermediate values goes through this primitive.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int BlendLdrReplicated(int p0, int p1, int weight)
+ => BlendWeighted((p0 << 8) | p0, (p1 << 8) | p1, weight);
+
+ ///
+ /// Normalises a UNORM16 value (clamped to [0, 0xFFFF]) to the [0.0, 1.0] float range.
+ /// Used by the HDR output path when an LDR endpoint or mode-14 LDR alpha (ASTC spec §C.2.14)
+ /// has already been interpolated as an integer.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float Unorm16ToFloat(int interpolated)
+ => Math.Clamp(interpolated, 0, 0xFFFF) / 65535.0f;
+
+ ///
+ /// followed by clamp-to-UNORM16 — the LDR-channel
+ /// interpolation path used by the HDR output writer (ASTC spec §C.2.19).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort BlendLdrReplicatedAsUnorm16(int p0, int p1, int weight)
+ => (ushort)Math.Clamp(BlendLdrReplicated(p0, p1, weight), 0, 0xFFFF);
+
+ ///
+ /// followed by clamp-to-UNORM16 — the HDR-channel
+ /// interpolation path. HDR endpoints are already 16-bit values (FP16 bit patterns), so
+ /// no 8→16 expansion is needed.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort BlendWeightedAsUnorm16(int p0, int p1, int weight)
+ => (ushort)Math.Clamp(BlendWeighted(p0, p1, weight), 0, 0xFFFF);
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/Partition.cs b/src/ImageSharp.Textures/Compression/Astc/Core/Partition.cs
new file mode 100644
index 00000000..a4231993
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/Partition.cs
@@ -0,0 +1,215 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Collections.Concurrent;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+internal sealed class Partition
+{
+ private static readonly ConcurrentDictionary<(FootprintType, int, int), Partition> PartitionCache = new();
+ private static readonly ConcurrentDictionary SinglePartitionCache = new();
+
+ private readonly int[] assignment;
+
+ private Partition(int[] assignment) => this.assignment = assignment;
+
+ ///
+ /// Gets the per-texel partition-subset map (length ).
+ /// Cached and shared across blocks that resolve to the same partition.
+ ///
+ public ReadOnlySpan Assignment => this.assignment;
+
+ ///
+ /// Returns the shared single-partition assignment for the given footprint. Every texel is
+ /// assigned to subset 0, so one zero-filled array is reused across
+ /// all callers (void-extent blocks and single-partition logical-path blocks).
+ ///
+ public static Partition GetSinglePartition(Footprint footprint)
+ => SinglePartitionCache.GetOrAdd(
+ footprint.Type,
+ static (_, fp) => new Partition(new int[fp.PixelCount]),
+ footprint);
+
+ public static Partition GetASTCPartition(Footprint footprint, int partitionCount, int partitionId)
+ => PartitionCache.GetOrAdd(
+ (footprint.Type, partitionCount, partitionId),
+ static (key, fp) => Build(fp, key.Item2, key.Item3),
+ footprint);
+
+ private static Partition Build(Footprint footprint, int partitionCount, int partitionId)
+ {
+ int w = footprint.Width;
+ int h = footprint.Height;
+ int[] assignment = new int[w * h];
+ int idx = 0;
+ for (int y = 0; y < h; ++y)
+ {
+ for (int x = 0; x < w; ++x)
+ {
+ assignment[idx++] = SelectASTCPartition(partitionId, x, y, 0, partitionCount, footprint.PixelCount);
+ }
+ }
+
+ return new Partition(assignment);
+ }
+
+ ///
+ /// Computes the partition index (0..-1) for a texel at
+ /// (, , ) given the block's
+ /// 10-bit partition . Implements ASTC spec §C.2.21's partition
+ /// selection hash: a PRNG scrambles the seed, then 12 small seeds weight the texel
+ /// coordinates into four candidate values whose largest wins.
+ ///
+ private static int SelectASTCPartition(int seed, int x, int y, int z, int partitionCount, int pixelCount)
+ {
+ if (partitionCount <= 1)
+ {
+ return 0;
+ }
+
+ // Small footprints (< 31 texels) have all coordinates doubled so neighbouring texels
+ // spread further through the hash and avoid degenerate single-partition patterns.
+ if (pixelCount < 31)
+ {
+ x <<= 1;
+ y <<= 1;
+ z <<= 1;
+ }
+
+ uint randomNumber = ScrambleSeed(seed, partitionCount);
+
+ // Fixed 12 uints (48 bytes) — partition hash uses 12 4-bit sub-seeds per spec §C.2.21.
+ Span subseeds = stackalloc uint[12];
+ ExtractSubSeeds(randomNumber, subseeds);
+ ShiftSubSeeds(subseeds, seed, partitionCount);
+
+ (int a, int b, int c, int d) = MixSubSeedsWithCoords(subseeds, randomNumber, x, y, z);
+ return SelectPartitionFromCandidates(a, b, c, d, partitionCount);
+ }
+
+ ///
+ /// Applies the 10-step PRNG scramble from ASTC spec §C.2.21 Listing 11 to the 10-bit
+ /// seed offset by .
+ ///
+ private static uint ScrambleSeed(int seed, int partitionCount)
+ {
+ uint random = (uint)(seed + ((partitionCount - 1) * 1024));
+ random ^= random >> 15;
+ random -= random << 17;
+ random += random << 7;
+ random += random << 4;
+ random ^= random >> 5;
+ random += random << 16;
+ random ^= random >> 7;
+ random ^= random >> 3;
+ random ^= random << 6;
+ random ^= random >> 17;
+ return random;
+ }
+
+ ///
+ /// Extracts the 12 4-bit sub-seeds from the scrambled number per ASTC spec §C.2.21
+ /// and squares each. The squaring biases the distribution so small values stay small
+ /// and large values become dominant.
+ ///
+ private static void ExtractSubSeeds(uint random, Span subseeds)
+ {
+ subseeds[0] = random & 0xF;
+ subseeds[1] = (random >> 4) & 0xF;
+ subseeds[2] = (random >> 8) & 0xF;
+ subseeds[3] = (random >> 12) & 0xF;
+ subseeds[4] = (random >> 16) & 0xF;
+ subseeds[5] = (random >> 20) & 0xF;
+ subseeds[6] = (random >> 24) & 0xF;
+ subseeds[7] = (random >> 28) & 0xF;
+ subseeds[8] = (random >> 18) & 0xF;
+ subseeds[9] = (random >> 22) & 0xF;
+ subseeds[10] = (random >> 26) & 0xF;
+ subseeds[11] = ((random >> 30) | (random << 2)) & 0xF;
+
+ for (int i = 0; i < 12; ++i)
+ {
+ subseeds[i] *= subseeds[i];
+ }
+ }
+
+ ///
+ /// Right-shifts each sub-seed by one of three mode-dependent shift amounts (sh1, sh2, sh3)
+ /// per ASTC spec §C.2.21. The shift choice is driven by low-order bits of the original
+ /// seed together with the partition count.
+ ///
+ private static void ShiftSubSeeds(Span subseeds, int seed, int partitionCount)
+ {
+ int sh1, sh2;
+ if ((seed & 1) != 0)
+ {
+ sh1 = (seed & 2) != 0 ? 4 : 5;
+ sh2 = partitionCount == 3 ? 6 : 5;
+ }
+ else
+ {
+ sh1 = partitionCount == 3 ? 6 : 5;
+ sh2 = (seed & 2) != 0 ? 4 : 5;
+ }
+
+ int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
+
+ subseeds[0] >>= sh1;
+ subseeds[1] >>= sh2;
+ subseeds[2] >>= sh1;
+ subseeds[3] >>= sh2;
+ subseeds[4] >>= sh1;
+ subseeds[5] >>= sh2;
+ subseeds[6] >>= sh1;
+ subseeds[7] >>= sh2;
+ subseeds[8] >>= sh3;
+ subseeds[9] >>= sh3;
+ subseeds[10] >>= sh3;
+ subseeds[11] >>= sh3;
+ }
+
+ ///
+ /// Computes the four candidate values a, b, c, d as weighted combinations of the texel
+ /// coordinates with sub-seeds as weights, plus the scrambled-number shifted by a
+ /// candidate-specific amount. Low six bits are retained per ASTC spec §C.2.21.
+ ///
+ private static (int A, int B, int C, int D) MixSubSeedsWithCoords(ReadOnlySpan subseeds, uint random, int x, int y, int z)
+ {
+ int a = (int)((subseeds[0] * x) + (subseeds[1] * y) + (subseeds[10] * z) + (random >> 14));
+ int b = (int)((subseeds[2] * x) + (subseeds[3] * y) + (subseeds[11] * z) + (random >> 10));
+ int c = (int)((subseeds[4] * x) + (subseeds[5] * y) + (subseeds[8] * z) + (random >> 6));
+ int d = (int)((subseeds[6] * x) + (subseeds[7] * y) + (subseeds[9] * z) + (random >> 2));
+ return (a & 0x3F, b & 0x3F, c & 0x3F, d & 0x3F);
+ }
+
+ ///
+ /// Returns the index of the largest of a, b, c, d after zeroing the unused ones based on
+ /// . Ties prefer the lower index (matches ASTC spec
+ /// §C.2.21's cascade of ≥ comparisons).
+ ///
+ private static int SelectPartitionFromCandidates(int a, int b, int c, int d, int partitionCount)
+ {
+ if (partitionCount <= 3)
+ {
+ d = 0;
+ }
+
+ if (partitionCount <= 2)
+ {
+ c = 0;
+ }
+
+ if (a >= b && a >= c && a >= d)
+ {
+ return 0;
+ }
+
+ if (b >= c && b >= d)
+ {
+ return 1;
+ }
+
+ return c >= d ? 2 : 3;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/SimdHelpers.cs b/src/ImageSharp.Textures/Compression/Astc/Core/SimdHelpers.cs
new file mode 100644
index 00000000..f92b02b7
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/SimdHelpers.cs
@@ -0,0 +1,140 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+internal static class SimdHelpers
+{
+ private static readonly Vector128 Vec32 = Vector128.Create(32);
+ private static readonly Vector128 Vec64 = Vector128.Create(64);
+ private static readonly Vector128 Vec255 = Vector128.Create(255);
+
+ ///
+ /// Interpolates one channel for 4 pixels simultaneously.
+ /// All 4 pixels share the same endpoint values but have different weights.
+ /// Returns 4 byte results packed into the lower bytes of a .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 Interpolate4ChannelPixels(int p0, int p1, Vector128 weights)
+ {
+ // Bit-replicate endpoint bytes to 16-bit
+ Vector128 c0 = Vector128.Create((p0 << 8) | p0);
+ Vector128 c1 = Vector128.Create((p1 << 8) | p1);
+
+ // c = (c0 * (64 - w) + c1 * w + 32) >> 6
+ // NOTE: Using >> 6 instead of / 64 because Vector128 division
+ // has no hardware support and decomposes to scalar operations.
+ Vector128 w64 = Vec64 - weights;
+ Vector128 c = ((c0 * w64) + (c1 * weights) + Vec32) >> 6;
+
+ // Spec §C.2.19 (Weight Application): for LDR-mode UNORM8 output the final
+ // 8-bit result is the top 8 bits of the UNORM16 interpolation. Mask
+ // to [0, 255] to defend against malformed endpoints producing c outside
+ // [0, 0xFFFF]; well-formed input is already in range.
+ return (c >>> 8) & Vec255;
+ }
+
+ ///
+ /// Writes 4 LDR pixels directly to output buffer using SIMD.
+ /// Processes each channel across 4 pixels in parallel, then interleaves to RGBA output.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void Write4PixelLdr(
+ Span output,
+ int offset,
+ int lowR,
+ int lowG,
+ int lowB,
+ int lowA,
+ int highR,
+ int highG,
+ int highB,
+ int highA,
+ Vector128 weights)
+ {
+ Vector128 r = Interpolate4ChannelPixels(lowR, highR, weights);
+ Vector128 g = Interpolate4ChannelPixels(lowG, highG, weights);
+ Vector128 b = Interpolate4ChannelPixels(lowB, highB, weights);
+ Vector128 a = Interpolate4ChannelPixels(lowA, highA, weights);
+
+ // Pack 4 RGBA pixels into 16 bytes via vector OR+shift.
+ // Each int element has its channel value in bits [0:7].
+ // Combine: element[i] = R[i] | (G[i] << 8) | (B[i] << 16) | (A[i] << 24)
+ // On little-endian, storing this int32 writes bytes [R, G, B, A].
+ Vector128 rgba = r | (g << 8) | (b << 16) | (a << 24);
+ rgba.AsByte().CopyTo(output.Slice(offset, 16));
+ }
+
+ ///
+ /// Scalar single-pixel LDR interpolation, writing directly to buffer.
+ /// No Rgba32 allocation.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void WriteSinglePixelLdr(
+ Span output,
+ int offset,
+ int lowR,
+ int lowG,
+ int lowB,
+ int lowA,
+ int highR,
+ int highG,
+ int highB,
+ int highA,
+ int weight)
+ {
+ output[offset + 0] = (byte)InterpolateChannelScalar(lowR, highR, weight);
+ output[offset + 1] = (byte)InterpolateChannelScalar(lowG, highG, weight);
+ output[offset + 2] = (byte)InterpolateChannelScalar(lowB, highB, weight);
+ output[offset + 3] = (byte)InterpolateChannelScalar(lowA, highA, weight);
+ }
+
+ ///
+ /// Scalar single-pixel dual-plane LDR interpolation, writing directly to buffer.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void WriteSinglePixelLdrDualPlane(
+ Span output,
+ int offset,
+ int lowR,
+ int lowG,
+ int lowB,
+ int lowA,
+ int highR,
+ int highG,
+ int highB,
+ int highA,
+ int weight,
+ int dpChannel,
+ int dpWeight)
+ {
+ output[offset + 0] = (byte)InterpolateChannelScalar(
+ lowR,
+ highR,
+ dpChannel == 0 ? dpWeight : weight);
+ output[offset + 1] = (byte)InterpolateChannelScalar(
+ lowG,
+ highG,
+ dpChannel == 1 ? dpWeight : weight);
+ output[offset + 2] = (byte)InterpolateChannelScalar(
+ lowB,
+ highB,
+ dpChannel == 2 ? dpWeight : weight);
+ output[offset + 3] = (byte)InterpolateChannelScalar(
+ lowA,
+ highA,
+ dpChannel == 3 ? dpWeight : weight);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static int InterpolateChannelScalar(int p0, int p1, int weight)
+ {
+ // Spec §C.2.19 (Weight Application): for LDR-mode UNORM8 output the final
+ // 8-bit result is the top 8 bits of the UNORM16 interpolation.
+ int c = Interpolation.BlendLdrReplicated(p0, p1, weight);
+ return (c >> 8) & 0xFF;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/UInt128Extensions.cs b/src/ImageSharp.Textures/Compression/Astc/Core/UInt128Extensions.cs
new file mode 100644
index 00000000..64b39e7a
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/UInt128Extensions.cs
@@ -0,0 +1,55 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+internal static class UInt128Extensions
+{
+ ///
+ /// The lower 64 bits of the value
+ ///
+ public static ulong Low(this UInt128 value)
+ => (ulong)(value & 0xFFFFFFFFFFFFFFFFUL);
+
+ ///
+ /// The upper 64 bits of the value
+ ///
+ public static ulong High(this UInt128 value)
+ => (ulong)(value >> 64);
+
+ ///
+ /// A mask with the lowest n bits set to 1
+ ///
+ public static UInt128 OnesMask(int n) => n switch
+ {
+ <= 0 => UInt128.Zero,
+ >= 128 => UInt128.MaxValue,
+ _ => UInt128.MaxValue >> (128 - n)
+ };
+
+ ///
+ /// Reverses bits across the full 128-bit value. Used by the BISE weight decoder
+ /// (ASTC spec §C.2.12) — weight data is encoded most-significant-bit-first into the
+ /// high end of the block, so callers reverse the block before reading weights as
+ /// a normal little-endian sequence.
+ ///
+ public static UInt128 ReverseBits(this UInt128 value)
+ {
+ ulong revLow = ReverseBits(value.Low());
+ ulong revHigh = ReverseBits(value.High());
+
+ return new UInt128(revLow, revHigh);
+ }
+
+ private static ulong ReverseBits(ulong x)
+ {
+ x = ((x >> 1) & 0x5555555555555555UL) | ((x & 0x5555555555555555UL) << 1);
+ x = ((x >> 2) & 0x3333333333333333UL) | ((x & 0x3333333333333333UL) << 2);
+ x = ((x >> 4) & 0x0F0F0F0F0F0F0F0FUL) | ((x & 0x0F0F0F0F0F0F0F0FUL) << 4);
+ x = ((x >> 8) & 0x00FF00FF00FF00FFUL) | ((x & 0x00FF00FF00FF00FFUL) << 8);
+ x = ((x >> 16) & 0x0000FFFF0000FFFFUL) | ((x & 0x0000FFFF0000FFFFUL) << 16);
+ x = (x >> 32) | (x << 32);
+
+ return x;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/Core/WeightGrid.cs b/src/ImageSharp.Textures/Compression/Astc/Core/WeightGrid.cs
new file mode 100644
index 00000000..57f60220
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/Core/WeightGrid.cs
@@ -0,0 +1,9 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+///
+/// Weight grid metadata for a single block (ASTC spec §C.2.7, §C.2.8).
+///
+internal readonly record struct WeightGrid(int Width, int Height, int Range, int BitCount);
diff --git a/src/ImageSharp.Textures/Compression/Astc/IO/AstcFile.cs b/src/ImageSharp.Textures/Compression/Astc/IO/AstcFile.cs
new file mode 100644
index 00000000..307f913b
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/IO/AstcFile.cs
@@ -0,0 +1,85 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.Textures.Compression.Astc.Core;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.IO;
+
+///
+/// A very simple format consisting of a small header followed immediately
+/// by the binary payload for a single image surface.
+///
+///
+/// See https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md
+///
+internal record AstcFile
+{
+ private readonly AstcFileHeader header;
+ private readonly byte[] blocks;
+
+ internal AstcFile(AstcFileHeader header, byte[] blocks)
+ {
+ this.header = header;
+ this.blocks = blocks;
+ this.Footprint = this.GetFootprint();
+ }
+
+ public ReadOnlySpan Blocks => this.blocks;
+
+ public Footprint Footprint { get; }
+
+ public int Width => this.header.ImageWidth;
+
+ public int Height => this.header.ImageHeight;
+
+ public int Depth => this.header.ImageDepth;
+
+ public static AstcFile FromMemory(byte[] data)
+ {
+ Guard.NotNull(data);
+ Guard.MustBeGreaterThanOrEqualTo(data.Length, AstcFileHeader.SizeInBytes, nameof(data));
+
+ AstcFileHeader header = AstcFileHeader.FromMemory(data.AsSpan(0, AstcFileHeader.SizeInBytes));
+
+ int blockDataLength = data.Length - AstcFileHeader.SizeInBytes;
+ Guard.IsTrue(blockDataLength % BlockInfo.SizeInBytes == 0, nameof(data), "ASTC block data length must be a multiple of the block size.");
+
+ int blocksWide = (header.ImageWidth + header.BlockWidth - 1) / header.BlockWidth;
+ int blocksHigh = (header.ImageHeight + header.BlockHeight - 1) / header.BlockHeight;
+ long expectedBlockCount = (long)blocksWide * blocksHigh;
+ long actualBlockCount = blockDataLength / BlockInfo.SizeInBytes;
+ if (actualBlockCount != expectedBlockCount)
+ {
+ throw new ArgumentOutOfRangeException(
+ nameof(data),
+ $"ASTC payload contains {actualBlockCount} blocks but the header describes {expectedBlockCount}");
+ }
+
+ byte[] blocks = new byte[blockDataLength];
+ Array.Copy(data, AstcFileHeader.SizeInBytes, blocks, 0, blocks.Length);
+
+ return new AstcFile(header, blocks);
+ }
+
+ ///
+ /// Map the block dimensions in the header to a Footprint, if possible.
+ ///
+ private Footprint GetFootprint() => (this.header.BlockWidth, this.header.BlockHeight) switch
+ {
+ (4, 4) => Footprint.FromFootprintType(FootprintType.Footprint4x4),
+ (5, 4) => Footprint.FromFootprintType(FootprintType.Footprint5x4),
+ (5, 5) => Footprint.FromFootprintType(FootprintType.Footprint5x5),
+ (6, 5) => Footprint.FromFootprintType(FootprintType.Footprint6x5),
+ (6, 6) => Footprint.FromFootprintType(FootprintType.Footprint6x6),
+ (8, 5) => Footprint.FromFootprintType(FootprintType.Footprint8x5),
+ (8, 6) => Footprint.FromFootprintType(FootprintType.Footprint8x6),
+ (8, 8) => Footprint.FromFootprintType(FootprintType.Footprint8x8),
+ (10, 5) => Footprint.FromFootprintType(FootprintType.Footprint10x5),
+ (10, 6) => Footprint.FromFootprintType(FootprintType.Footprint10x6),
+ (10, 8) => Footprint.FromFootprintType(FootprintType.Footprint10x8),
+ (10, 10) => Footprint.FromFootprintType(FootprintType.Footprint10x10),
+ (12, 10) => Footprint.FromFootprintType(FootprintType.Footprint12x10),
+ (12, 12) => Footprint.FromFootprintType(FootprintType.Footprint12x12),
+ _ => throw new NotSupportedException($"Unsupported block dimensions: {this.header.BlockWidth}x{this.header.BlockHeight}"),
+ };
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/IO/AstcFileHeader.cs b/src/ImageSharp.Textures/Compression/Astc/IO/AstcFileHeader.cs
new file mode 100644
index 00000000..f52dfbce
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/IO/AstcFileHeader.cs
@@ -0,0 +1,96 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Buffers.Binary;
+
+namespace SixLabors.ImageSharp.Textures.Compression.Astc.IO;
+
+///
+/// The 16 byte ASTC file header
+///
+///
+/// ASTC block and decoded image dimensions in texels.
+///
+/// For 2D images the Z dimension must be set to 1.
+///
+/// Note that the image is not required to be an exact multiple of the compressed block
+/// size; the compressed data may include padding that is discarded during decompression.
+///
+internal readonly record struct AstcFileHeader(byte BlockWidth, byte BlockHeight, byte BlockDepth, int ImageWidth, int ImageHeight, int ImageDepth)
+{
+ public const uint Magic = 0x5CA1AB13;
+ public const int SizeInBytes = 16;
+
+ // 2D footprints from the ASTC spec. 3D footprints are not supported.
+ private static readonly (byte Width, byte Height)[] Valid2DFootprints =
+ [
+ (4, 4), (5, 4), (5, 5), (6, 5), (6, 6),
+ (8, 5), (8, 6), (8, 8),
+ (10, 5), (10, 6), (10, 8), (10, 10),
+ (12, 10), (12, 12)
+ ];
+
+ public static AstcFileHeader FromMemory(Span data)
+ {
+ Guard.MustBeSizedAtLeast(data, SizeInBytes, nameof(data));
+
+ // ASTC header is 16 bytes:
+ // - magic (4),
+ // - blockdim (3),
+ // - xsize,y,z (each 3 little-endian bytes)
+ uint magic = BinaryPrimitives.ReadUInt32LittleEndian(data);
+ Guard.IsTrue(magic == Magic, nameof(data), $"Invalid ASTC file magic: expected 0x{Magic:X8}.");
+
+ byte blockWidth = data[4];
+ byte blockHeight = data[5];
+ byte blockDepth = data[6];
+
+ // Only 2D footprints are supported, so block depth must be 1.
+ if (blockDepth != 1)
+ {
+ throw new NotSupportedException($"ASTC 3D block footprints are not supported (block depth = {blockDepth})");
+ }
+
+ if (!IsValid2DFootprint(blockWidth, blockHeight))
+ {
+ throw new NotSupportedException($"Unsupported ASTC block dimensions: {blockWidth}x{blockHeight}");
+ }
+
+ int imageWidth = data[7] | (data[8] << 8) | (data[9] << 16);
+ int imageHeight = data[10] | (data[11] << 8) | (data[12] << 16);
+ int imageDepth = data[13] | (data[14] << 8) | (data[15] << 16);
+
+ Guard.MustBeGreaterThan(imageWidth, 0, nameof(imageWidth));
+ Guard.MustBeGreaterThan(imageHeight, 0, nameof(imageHeight));
+ Guard.MustBeGreaterThan(imageDepth, 0, nameof(imageDepth));
+
+ // Guard against callers that compute a 4-byte-per-pixel RGBA32 output buffer.
+ const int bytesPerPixel = 4;
+ long totalPixels = (long)imageWidth * imageHeight;
+ if (totalPixels > int.MaxValue / bytesPerPixel)
+ {
+ throw new ArgumentOutOfRangeException(nameof(data), "ASTC image dimensions exceed the maximum supported size");
+ }
+
+ return new AstcFileHeader(
+ BlockWidth: blockWidth,
+ BlockHeight: blockHeight,
+ BlockDepth: blockDepth,
+ ImageWidth: imageWidth,
+ ImageHeight: imageHeight,
+ ImageDepth: imageDepth);
+ }
+
+ private static bool IsValid2DFootprint(byte width, byte height)
+ {
+ foreach ((byte w, byte h) in Valid2DFootprints)
+ {
+ if (w == width && h == height)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
diff --git a/src/ImageSharp.Textures/Compression/Astc/KHR_texture_compression_astc_hdr.txt b/src/ImageSharp.Textures/Compression/Astc/KHR_texture_compression_astc_hdr.txt
new file mode 100644
index 00000000..500d3bbe
--- /dev/null
+++ b/src/ImageSharp.Textures/Compression/Astc/KHR_texture_compression_astc_hdr.txt
@@ -0,0 +1,2189 @@
+Name
+
+ KHR_texture_compression_astc_hdr
+
+Name Strings
+
+ GL_KHR_texture_compression_astc_hdr
+ GL_KHR_texture_compression_astc_ldr
+
+Contact
+
+ Sean Ellis (sean.ellis 'at' arm.com)
+ Jon Leech (oddhack 'at' sonic.net)
+
+Contributors
+
+ Sean Ellis, ARM
+ Jorn Nystad, ARM
+ Tom Olson, ARM
+ Andy Pomianowski, AMD
+ Cass Everitt, NVIDIA
+ Walter Donovan, NVIDIA
+ Robert Simpson, Qualcomm
+ Maurice Ribble, Qualcomm
+ Larry Seiler, Intel
+ Daniel Koch, NVIDIA
+ Anthony Wood, Imagination Technologies
+ Jon Leech
+ Andrew Garrard, Samsung
+
+IP Status
+
+ No known issues.
+
+Notice
+
+ Copyright (c) 2012-2016 The Khronos Group Inc. Copyright terms at
+ http://www.khronos.org/registry/speccopyright.html
+
+Specification Update Policy
+
+ Khronos-approved extension specifications are updated in response to
+ issues and bugs prioritized by the Khronos OpenGL and OpenGL ES Working Groups. For
+ extensions which have been promoted to a core Specification, fixes will
+ first appear in the latest version of that core Specification, and will
+ eventually be backported to the extension document. This policy is
+ described in more detail at
+ https://www.khronos.org/registry/OpenGL/docs/update_policy.php
+
+Status
+
+ Complete.
+ Approved by the ARB on 2012/06/18.
+ Approved by the OpenGL ES WG on 2012/06/15.
+ Ratified by the Khronos Board of Promoters on 2012/07/27 (LDR profile).
+ Ratified by the Khronos Board of Promoters on 2013/09/27 (HDR profile).
+
+Version
+
+ Version 8, June 8, 2017
+
+Number
+
+ ARB Extension #118
+ OpenGL ES Extension #117
+
+Dependencies
+
+ Written based on the wording of the OpenGL ES 3.1 (April 29, 2015)
+ Specification
+
+ May be implemented against any version of OpenGL or OpenGL ES supporting
+ compressed textures.
+
+ Some of the functionality of these extensions is not supported if the
+ underlying implementation does not support cube map array textures.
+
+
+Overview
+
+ Adaptive Scalable Texture Compression (ASTC) is a new texture
+ compression technology that offers unprecendented flexibility, while
+ producing better or comparable results than existing texture
+ compressions at all bit rates. It includes support for 2D and
+ slice-based 3D textures, with low and high dynamic range, at bitrates
+ from below 1 bit/pixel up to 8 bits/pixel in fine steps.
+
+ The goal of these extensions is to support the full 2D profile of the
+ ASTC texture compression specification, and allow construction of 3D
+ textures from multiple compressed 2D slices.
+
+ ASTC-compressed textures are handled in OpenGL ES and OpenGL by adding
+ new supported formats to the existing commands for defining and updating
+ compressed textures, and defining the interaction of the ASTC formats
+ with each texture target.
+
+New Procedures and Functions
+
+ None
+
+New Tokens
+
+ Accepted by the parameter of CompressedTexSubImage2D and
+ CompressedTexSubImage3D, and by the parameter of
+ CompressedTexImage2D, CompressedTexImage3D, TexStorage2D,
+ TextureStorage2D, TexStorage3D, and TextureStorage3D:
+
+ COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0
+ COMPRESSED_RGBA_ASTC_5x4_KHR 0x93B1
+ COMPRESSED_RGBA_ASTC_5x5_KHR 0x93B2
+ COMPRESSED_RGBA_ASTC_6x5_KHR 0x93B3
+ COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4
+ COMPRESSED_RGBA_ASTC_8x5_KHR 0x93B5
+ COMPRESSED_RGBA_ASTC_8x6_KHR 0x93B6
+ COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7
+ COMPRESSED_RGBA_ASTC_10x5_KHR 0x93B8
+ COMPRESSED_RGBA_ASTC_10x6_KHR 0x93B9
+ COMPRESSED_RGBA_ASTC_10x8_KHR 0x93BA
+ COMPRESSED_RGBA_ASTC_10x10_KHR 0x93BB
+ COMPRESSED_RGBA_ASTC_12x10_KHR 0x93BC
+ COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD
+
+ COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR 0x93D0
+ COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR 0x93D1
+ COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR 0x93D2
+ COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR 0x93D3
+ COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR 0x93D4
+ COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR 0x93D5
+ COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR 0x93D6
+ COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR 0x93D7
+ COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR 0x93D8
+ COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR 0x93D9
+ COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR 0x93DA
+ COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR 0x93DB
+ COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR 0x93DC
+ COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR 0x93DD
+
+ If extension "EXT_texture_storage" is supported, these tokens are also
+ accepted by TexStorage2DEXT, TextureStorage2DEXT, TexStorage3DEXT and
+ TextureStorage3DEXT.
+
+Additions to Chapter 8 of the OpenGL ES 3.1 Specification (Textures and Samplers)
+
+ Add to Section 8.7 Compressed Texture Images:
+
+ Modify table 8.19 (Compressed internal formats) to add all the ASTC
+ format tokens in the New Tokens section. The "Base Internal Format"
+ column is RGBA for all ASTC formats.
+
+ Add a new column "Block Width x Height", which is 4x4 for all non-ASTC
+ formats in the table, and matches the size in the token name for ASTC
+ formats (e.g. COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR has a block size of
+ 10 x 8).
+
+ Add a second new column "3D Tex." which is empty for all non-ASTC
+ formats. If only the LDR profile is supported by the implementation,
+ this column is also empty for all ASTC formats. If both the LDR and HDR
+ profiles are supported, this column is checked for all ASTC formats.
+
+ Add a third new column "Cube Map Array Tex." which is empty for all
+ non-ASTC formats, and checked for all ASTC formats.
+
+ Append to the table caption:
+
+ "The "Block Size" column specifies the compressed block size of the
+ format. Modifying compressed images along aligned block boundaries is
+ possible, as described in this section. The "3D Tex." and "Cube Map
+ Array Tex." columns determine if 3D images composed of compressed 2D
+ slices, and cube map array textures respectively can be specified using
+ CompressedTexImage3D."
+
+ Append to the paragraph at the bottom of p. 168:
+
+ "If is one of the specific ... supports only
+ two-dimensional images. However, if the "3D Tex." column of table 8.19
+ is checked, CompressedTexImage3D will accept a three-dimensional image
+ specified as an array of compressed data consisting of multiple rows of
+ compressed blocks laid out as described in section 8.5."
+
+ Modify the second and third errors in the Errors section for
+ CompressedTexImage[2d]D on p. 169, and add a new error:
+
+ "An INVALID_VALUE error is generated by
+
+ * CompressedTexImage2D if is
+ one of the cube map face targets from table 8.21, and
+ * CompressedTexImage3D if is TEXTURE_CUBE_MAP_ARRAY,
+
+ and and are not equal.
+
+ An INVALID_OPERATION error is generated by CompressedTexImage3D if
+ is one of the the formats in table 8.19 and is
+ not TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY, or TEXTURE_3D.
+
+ An INVALID_OPERATION error is generated by CompressedTexImage3D if
+ is TEXTURE_CUBE_MAP_ARRAY and the "Cube Map Array"
+ column of table 8.19 is *not* checked, or if is
+ TEXTURE_3D and the "3D Tex." column of table 8.19 is *not* checked"
+
+ Modify the fifth and sixth paragraphs on p. 170:
+
+ "Since these specific compressed formats are easily edited along texel
+ block boundaries, the limitations on subimage location and size are
+ relaxed for CompressedTexSubImage2D and CompressedTexSubImage3D.
+
+ The block width and height varies for different formats, as described in
+ table 8.19. The contents of any block of texels of a compressed texture
+ image in these specific compressed formats that does not intersect the
+ area being modified are preserved during CompressedTexSubImage* calls."
+
+ Modify the second error in the Errors section for
+ CompressedTexSubImage[23]D on p. 170, and add a new error:
+
+ "An INVALID_OPERATION error is generated by CompressedTexSubImage3D if
+ is one of the formats in table 8.19 and is not
+ TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY, or TEXTURE_3D.
+
+ An INVALID_OPERATION error is generated by CompressedTexSubImage3D if
+ is TEXTURE_CUBE_MAP_ARRAY and the "Cube Map Array" column of
+ table 8.19 is *not* checked, or if is TEXTURE_3D and the "3D
+ Tex." column of table 8.19 is *not* checked"
+
+ Modify the final error in the same section, on p. 171:
+
+ "An INVALID_OPERATION error is generated if format is one of the formats
+ in table 8.19 and any of the following conditions occurs. The block
+ width and height refer to the values in the corresponding column of the
+ table.
+
+ * is not a multiple of the format's block width, and +
+ is not equal to the value of TEXTURE_WIDTH.
+ * height is not a multiple of the format's block height, and
+ + is not equal to the value of TEXTURE_HEIGHT.
+ * or is not a multiple of the block width or
+ height, respectively."
+
+ Modify table 8.24 (sRGB texture internal formats) to add all of the
+ COMPRESSED_SRGB8_ALPHA8_ASTC_*_KHR formats defined above.
+
+Additions to Appendix C of the OpenGL ES 3.1 Specification (Compressed
+Texture Image Formats)
+
+ Add a new sub-section on ASTC image formats, as follows:
+
+ "C.2 ASTC Compressed Texture Image Formats
+ =========================================
+
+ C.2.1 What is ASTC?
+ ---------------------
+
+ ASTC stands for Adaptive Scalable Texture Compression.
+ The ASTC formats form a family of related compressed texture image
+ formats. They are all derived from a common set of definitions.
+
+ ASTC textures may be encoded using either high or low dynamic range,
+ corresponding to the "HDR profile" and "LDR profile". Support for the
+ HDR profile is indicated by the "GL_KHR_texture_compression_astc_hdr"
+ extension string, and support for the LDR profile is indicated by the
+ "GL_KHR_texture_compression_astc_ldr" extension string.
+
+ The LDR profile supports two-dimensional images for texture targets
+ TEXTURE_2D. TEXTURE_2D_ARRAY, the six texture cube map face targets, and
+ TEXTURE_CUBE_MAP_ARRAY. These images may optionally be specified using
+ the sRGB color space for the RGB channels.
+
+ The HDR profile is a superset of the LDR profile, and also supports
+ texture target TEXTURE_3D for images made up of multiple two-dimensional
+ slices of compressed data. HDR images may be a mix of low and high
+ dynamic range data. If the HDR profile is supported, the LDR profile and
+ its extension string must also be supported.
+
+ ASTC textures may be encoded as 1, 2, 3 or 4 components, but they are
+ all decoded into RGBA.
+
+ Different ASTC formats have different block sizes, specified as part of
+ the name of the format token passed to CompressedImage2D and its related
+ functions, and in table 8.19.
+
+ Additional ASTC formats (the "Full profile") exist which support 3D data
+ specified as compressed 3D blocks. However, such formats are not defined
+ by either the LDR or HDR profiles, and are not described in this
+ specification.
+
+ C.2.2 Design Goals
+ --------------------
+
+ The design goals for the format are as follows:
+
+ * Random access. This is a must for any texture compression format.
+ * Bit exact decode. This is a must for conformance testing and
+ reproducibility.
+ * Suitable for mobile use. The format should be suitable for both
+ desktop and mobile GPU environments. It should be low bandwidth
+ and low in area.
+ * Flexible choice of bit rate. Current formats only offer a few bit
+ rates, leaving content developers with only coarse control over
+ the size/quality tradeoff.
+ * Scalable and long-lived. The format should support existing R, RG,
+ RGB and RGBA image types, and also have high "headroom", allowing
+ continuing use for several years and the ability to innovate in
+ encoders. Part of this is the choice to include HDR and 3D.
+ * Feature orthogonality. The choices for the various features of the
+ format are all orthogonal to each other. This has three effects:
+ first, it allows a large, flexible configuration space; second,
+ it makes that space easier to understand; and third, it makes
+ verification easier.
+ * Best in class at given bit rate. It should beat or match the current
+ best in class for peak signal-to-noise ratio (PSNR) at all bit rates.
+ * Fast decode. Texel throughput for a cached texture should be one
+ texel decode per clock cycle per decoder. Parallel decoding of several
+ texels from the same block should be possible at incremental cost.
+ * Low bandwidth. The encoding scheme should ensure that memory access
+ is kept to a minimum, cache reuse is high and memory bandwidth for
+ the format is low.
+ * Low area. It must occupy comparable die size to competing formats.
+
+ C.2.3 Basic Concepts
+ ----------------------
+
+ ASTC is a block-based lossy compression format. The compressed image
+ is divided into a number of blocks of uniform size, which makes it
+ possible to quickly determine which block a given texel resides in.
+
+ Each block has a fixed memory footprint of 128 bits, but these bits
+ can represent varying numbers of texels (the block "footprint").
+
+ Block footprint sizes are not confined to powers-of-two, and are
+ also not confined to be square. They may be 2D, in which case the
+ block dimensions range from 4 to 12 texels, or 3D, in which case
+ the block dimensions range from 3 to 6 texels.
+
+ Decoding one texel requires only the data from a single block. This
+ simplifies cache design, reduces bandwidth and improves encoder throughput.
+
+ C.2.4 Block Encoding
+ ----------------------
+
+ To understand how the blocks are stored and decoded, it is useful to start
+ with a simple example, and then introduce additional features.
+
+ The simplest block encoding starts by defining two color "endpoints". The
+ endpoints define two colors, and a number of additional colors are generated
+ by interpolating between them. We can define these colors using 1, 2, 3,
+ or 4 components (usually corresponding to R, RG, RGB and RGBA textures),
+ and using low or high dynamic range.
+
+ We then store a color interpolant weight for each texel in the image, which
+ specifies how to calculate the color to use. From this, a weighted average
+ of the two endpoint colors is used to generate the intermediate color,
+ which is the returned color for this texel.
+
+ There are several different ways of specifying the endpoint colors, and the
+ weights, but once they have been defined, calculation of the texel colors
+ proceeds identically for all of them. Each block is free to choose whichever
+ encoding scheme best represents its color endpoints, within the constraint
+ that all the data fits within the 128 bit block.
+
+ For blocks which have a large number of texels (e.g. a 12x12 block), there is
+ not enough space to explicitly store a weight for every texel. In this case,
+ a sparser grid with fewer weights is stored, and interpolation is used to
+ determine the effective weight to be used for each texel position. This allows
+ very low bit rates to be used with acceptable quality. This can also be used
+ to more efficiently encode blocks with low detail, or with strong vertical
+ or horizontal features.
+
+ For blocks which have a mixture of disparate colors, a single line in the
+ color space is not a good fit to the colors of the pixels in the original
+ image. It is therefore possible to partition the texels into multiple sets,
+ the pixels within each set having similar colors. For each of these
+ "partitions", we specify separate endpoint pairs, and choose which pair of
+ endpoints to use for a particular texel by looking up the partition index
+ from a partitioning pattern table. In ASTC, this partition table is actually
+ implemented as a function.
+
+ The endpoint encoding for each partition is independent.
+
+ For blocks which have uncorrelated channels - for example an image with a
+ transparency mask, or an image used as a normal map - it may be necessary
+ to specify two weights for each texel. Interpolation between the components
+ of the endpoint colors can then proceed independently for each "plane" of
+ the image. The assignment of channels to planes is selectable.
+
+ Since each of the above options is independent, it is possible to specify any
+ combination of channels, endpoint color encoding, weight encoding,
+ interpolation, multiple partitions and single or dual planes.
+
+ Since these values are specified per block, it is important that they are
+ represented with the minimum possible number of bits. As a result, these
+ values are packed together in ways which can be difficult to read, but
+ which are nevertheless highly amenable to hardware decode.
+
+ All of the values used as weights and color endpoint values can be specified
+ with a variable number of bits. The encoding scheme used allows a fine-
+ grained tradeoff between weight bits and color endpoint bits using "integer
+ sequence encoding". This can pack adjacent values together, allowing us to
+ use fractional numbers of bits per value.
+
+ Finally, a block may be just a single color. This is a so-called "void
+ extent block" and has a special coding which also allows it to identify
+ nearby regions of single color. This may be used to short-circuit fetching of
+ what would be identical blocks, and further reduce memory bandwidth.
+
+ C.2.5 LDR and HDR Modes
+ -------------------------
+
+ The decoding process for LDR content can be simplified if it is known in
+ advance that sRGB output is required. This selection is therefore included
+ as part of the global configuration.
+
+ The two modes differ in various ways.
+
+ -----------------------------------------------------------------------------
+ Operation LDR Mode HDR Mode
+ -----------------------------------------------------------------------------
+ Returned value Vector of FP16 values, Vector of FP16 values
+ or Vector of UNORM8 values.
+
+ sRGB compatible Yes No
+
+ LDR endpoint 16 bits, or 16 bits
+ decoding precision 8 bits for sRGB
+
+ HDR endpoint mode Error color As decoded
+ results
+
+ Error results Error color Vector of NaNs (0xFFFF)
+ -----------------------------------------------------------------------------
+ Table C.2.1 - Differences Between LDR and HDR Modes
+
+ The error color is opaque fully-saturated magenta
+ (R,G,B,A = 0xFF, 0x00, 0xFF, 0xFF). This has been chosen as it is much more
+ noticeable than black or white, and occurs far less often in valid images.
+
+ For linear RGB decode, the error color may be either opaque fully-saturated
+ magenta (R,G,B,A = 1.0, 0.0, 1.0, 1.0) or a vector of four NaNs
+ (R,G,B,A = NaN, NaN, NaN, NaN). In the latter case, the recommended NaN
+ value returned is 0xFFFF.
+
+ The error color is returned as an informative response to invalid
+ conditions, including invalid block encodings or use of reserved endpoint
+ modes.
+
+ Future, forward-compatible extensions to KHR_texture_compression_astc
+ may define valid interpretations of these conditions, which will decode to
+ some other color. Therefore, encoders and applications must not rely on
+ invalid encodings as a way of generating the error color.
+
+ C.2.6 Configuration Summary
+ -----------------------------
+
+ The global configuration data for the format is as follows:
+
+ * Block dimension (always 2D for both LDR and HDR profiles)
+ * Block footprint size
+ * sRGB output enabled or not
+
+ The data specified per block is as follows:
+
+ * Texel weight grid size
+ * Texel weight range
+ * Texel weight values
+ * Number of partitions
+ * Partition pattern index
+ * Color endpoint modes (includes LDR or HDR selection)
+ * Color endpoint data
+ * Number of planes
+ * Plane-to-channel assignment
+
+ C.2.7 Decode Procedure
+ ------------------------
+
+ To decode one texel:
+
+ Find block containing texel
+ Read block mode
+ If void-extent block, store void extent and immediately return single
+ color (optimization)
+
+ For each plane in image
+ If block mode requires infill
+ Find and decode stored weights adjacent to texel, unquantize and
+ interpolate
+ Else
+ Find and decode weight for texel, and unquantize
+
+ Read number of partitions
+ If number of partitions > 1
+ Read partition table pattern index
+ Look up partition number from pattern
+
+ Read color endpoint mode and endpoint data for selected partition
+ Unquantize color endpoints
+ Interpolate color endpoints using weight (or weights in dual-plane mode)
+ Return interpolated color
+
+ C.2.8 Block Determination and Bit Rates
+ The block footprint is a global setting for any given texture, and is
+ therefore not encoded in the individual blocks.
+
+ For 2D textures, the block footprint's width and height are selectable
+ from a number of predefined sizes, namely 4, 5, 6, 8, 10 and 12 pixels.
+
+ For square and nearly-square blocks, this gives the following bit rates:
+
+ -------------------------------------
+ Footprint
+ Width Height Bit Rate Increment
+ -------------------------------------
+ 4 4 8.00 125%
+ 5 4 6.40 125%
+ 5 5 5.12 120%
+ 6 5 4.27 120%
+ 6 6 3.56 114%
+ 8 5 3.20 120%
+ 8 6 2.67 105%
+ 10 5 2.56 120%
+ 10 6 2.13 107%
+ 8 8 2.00 125%
+ 10 8 1.60 125%
+ 10 10 1.28 120%
+ 12 10 1.07 120%
+ 12 12 0.89
+ -------------------------------------
+ Table C.2.2 - 2D Footprint and Bit Rates
+
+ The block footprint is shown as x in the format name. For
+ example, the format COMPRESSED_RGBA_ASTC_8x6_KHR specifies an image with
+ a block width of 8 texels, and a block height of 6 texels.
+
+ The "Increment" column indicates the ratio of bit rate against the next
+ lower available rate. A consistent value in this column indicates an even
+ spread of bit rates.
+
+ The HDR profile supports only those block footprints listed in Table
+ C.2.2. Other block sizes are not supported.
+
+ For images which are not an integer multiple of the block size, additional
+ texels are added to the edges with maximum X and Y. These texels may be
+ any color, as they will not be accessed.
+
+ Although these are not all powers of two, it is possible to calculate block
+ addresses and pixel addresses within the block, for legal image sizes,
+ without undue complexity.
+
+ Given a 2D image which is W x H pixels in size, with block size
+ w x h, the size of the image in blocks is:
+
+ Bw = ceiling(W/w)
+ Bh = ceiling(H/h)
+
+ For a 3D image, each 2D slice is a single texel thick, so that for an
+ image which is W x H x D pixels in size, with block size w x h, the size
+ of the image in blocks is:
+
+ Bw = ceiling(W/w)
+ Bh = ceiling(H/h)
+ Bd = D
+
+ C.2.9 Block Layout
+ --------------------
+
+ Each block in the image is stored as a single 128-bit block in memory. These
+ blocks are laid out in raster order, starting with the block at (0,0,0), then
+ ordered sequentially by X, Y and finally Z (if present). They are aligned to
+ 128-bit boundaries in memory.
+
+ The bits in the block are labeled in little-endian order - the byte at the
+ lowest address contains bits 0..7. Bit 0 is the least significant bit in the
+ byte.
+
+ Each block has the same basic layout, as shown in figure C.1.
+
+ 127 126 125 124 123 122 121 120 119 118 117 116 115 114 113 112
+ --------------------------------------------------------------
+ | Texel Weight Data (variable width) Fill direction ->
+ --------------------------------------------------------------
+
+ 111 110 109 108 107 106 105 104 103 102 101 100 99 98 97 96
+ --------------------------------------------------------------
+ Texel Weight Data
+ --------------------------------------------------------------
+
+ 95 94 93 92 91 90 89 88 87 86 85 84 83 82 81 80
+ --------------------------------------------------------------
+ Texel Weight Data
+ --------------------------------------------------------------
+
+ 79 78 77 76 75 74 73 72 71 70 69 68 67 66 65 64
+ --------------------------------------------------------------
+ Texel Weight Data
+ --------------------------------------------------------------
+
+ 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
+ --------------------------------------------------------------
+ : More config data :
+ --------------------------------------------------------------
+
+ 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
+ --------------------------------------------------------------
+ <-Fill direction Color Endpoint Data
+ --------------------------------------------------------------
+
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+ --------------------------------------------------------------
+ : Extra configuration data
+ --------------------------------------------------------------
+
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ --------------------------------------------------------------
+ Extra | Part | Block mode |
+ --------------------------------------------------------------
+
+ Figure C.1 - Block Layout Overview
+
+ Dotted partition lines indicate that the split position is not fixed.
+
+ The "Block mode" field specifies how the Texel Weight Data is encoded.
+
+ The "Part" field specifies the number of partitions, minus one. If dual
+ plane mode is enabled, the number of partitions must be 3 or fewer.
+ If 4 partitions are specified, the error value is returned for all
+ texels in the block.
+
+ The size and layout of the extra configuration data depends on the
+ number of partitions, and the number of planes in the image, as shown in
+ figures C.2 and C.3 (only the bottom 32 bits are shown):
+
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+ --------------------------------------------------------------
+ <- Color endpoint data |CEM
+ --------------------------------------------------------------
+
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ --------------------------------------------------------------
+ CEM | 0 0 | Block Mode |
+ --------------------------------------------------------------
+
+ Figure C.2 - Single-partition Block Layout
+
+ CEM is the color endpoint mode field, which determines how the Color
+ Endpoint Data is encoded.
+
+ If dual-plane mode is active, the color component selector bits appear
+ directly below the weight bits.
+
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+ --------------------------------------------------------------
+ | CEM | Partition Index
+ --------------------------------------------------------------
+
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ --------------------------------------------------------------
+ Partition Index | Block Mode |
+ --------------------------------------------------------------
+
+ Figure C.3 - Multi-partition Block Layout
+
+ The Partition Index field specifies which partition layout to use. CEM is
+ the first 6 bits of color endpoint mode information for the various
+ partitions. For modes which require more than 6 bits of CEM data, the
+ additional bits appear at a variable position directly beneath the texel
+ weight data.
+
+ If dual-plane mode is active, the color component selector bits then appear
+ directly below the additional CEM bits.
+
+ The final special case is that if bits [8:0] of the block are "111111100",
+ then the block is a void-extent block, which has a separate encoding
+ described in section C.2.23.
+
+ C.2.10 Block Mode
+ ------------------
+
+ The Block Mode field specifies the width, height and depth of the grid of
+ weights, what range of values they use, and whether dual weight planes are
+ present. Since some these are not represented using powers of two (there
+ are 12 possible weight widths, for example), and not all combinations are
+ allowed, this is not a simple bit packing. However, it can be unpacked
+ quickly in hardware.
+
+ The weight ranges are encoded using a 3 bit value R, which is interpreted
+ together with a precision bit H, as follows:
+
+ Low Precision Range (H=0) High Precision Range (H=1)
+ R Weight Range Trits Quints Bits Weight Range Trits Quints Bits
+ -------------------------------------------------------------------------
+ 000 Invalid Invalid
+ 001 Invalid Invalid
+ 010 0..1 1 0..9 1 1
+ 011 0..2 1 0..11 1 2
+ 100 0..3 2 0..15 4
+ 101 0..4 1 0..19 1 2
+ 110 0..5 1 1 0..23 1 3
+ 111 0..7 3 0..31 5
+ -------------------------------------------------------------------------
+ Table C.2.7 - Weight Range Encodings
+
+ Each weight value is encoded using the specified number of Trits, Quints
+ and Bits. The details of this encoding can be found in Section C.2.12 -
+ Integer Sequence Encoding.
+
+ For 2D blocks, the Block Mode field is laid out as follows:
+
+ -------------------------------------------------------------------------
+ 10 9 8 7 6 5 4 3 2 1 0 Width Height Notes
+ -------------------------------------------------------------------------
+ D H B A R0 0 0 R2 R1 B+4 A+2
+ D H B A R0 0 1 R2 R1 B+8 A+2
+ D H B A R0 1 0 R2 R1 A+2 B+8
+ D H 0 B A R0 1 1 R2 R1 A+2 B+6
+ D H 1 B A R0 1 1 R2 R1 B+2 A+2
+ D H 0 0 A R0 R2 R1 0 0 12 A+2
+ D H 0 1 A R0 R2 R1 0 0 A+2 12
+ D H 1 1 0 0 R0 R2 R1 0 0 6 10
+ D H 1 1 0 1 R0 R2 R1 0 0 10 6
+ B 1 0 A R0 R2 R1 0 0 A+6 B+6 D=0, H=0
+ x x 1 1 1 1 1 1 1 0 0 - - Void-extent
+ x x 1 1 1 x x x x 0 0 - - Reserved*
+ x x x x x x x 0 0 0 0 - - Reserved
+ -------------------------------------------------------------------------
+ Table C.2.8 - 2D Block Mode Layout
+
+ Note that, due to the encoding of the R field, as described in the
+ previous page, bits R2 and R1 cannot both be zero, which disambiguates
+ the first five rows from the rest of the table.
+
+ Bit positions with a value of x are ignored for purposes of determining
+ if a block is a void-extent block or reserved, but may have defined
+ encodings for specific void-extent blocks.
+
+ The penultimate row of the table is reserved only if bits [5:2] are not
+ all 1, in which case it encodes a void-extent block (as shown in the
+ previous row).
+
+ The D bit is set to indicate dual-plane mode. In this mode, the maximum
+ allowed number of partitions is 3.
+
+ The penultimate row of the table is reserved only if bits [4:2] are not
+ all 1, in which case it encodes a void-extent block (as shown in the
+ previous row).
+
+ The size of the grid in each dimension must be less than or equal to
+ the corresponding dimension of the block footprint. If the grid size
+ is greater than the footprint dimension in any axis, then this is an
+ illegal block encoding and all texels will decode to the error color.
+
+ C.2.11 Color Endpoint Mode
+ ---------------------------
+
+ In single-partition mode, the Color Endpoint Mode (CEM) field stores one
+ of 16 possible values. Each of these specifies how many raw data values
+ are encoded, and how to convert these raw values into two RGBA color
+ endpoints. They can be summarized as follows:
+
+ ---------------------------------------------
+ CEM Description Class
+ ---------------------------------------------
+ 0 LDR Luminance, direct 0
+ 1 LDR Luminance, base+offset 0
+ 2 HDR Luminance, large range 0
+ 3 HDR Luminance, small range 0
+ 4 LDR Luminance+Alpha, direct 1
+ 5 LDR Luminance+Alpha, base+offset 1
+ 6 LDR RGB, base+scale 1
+ 7 HDR RGB, base+scale 1
+ 8 LDR RGB, direct 2
+ 9 LDR RGB, base+offset 2
+ 10 LDR RGB, base+scale plus two A 2
+ 11 HDR RGB, direct 2
+ 12 LDR RGBA, direct 3
+ 13 LDR RGBA, base+offset 3
+ 14 HDR RGB, direct + LDR Alpha 3
+ 15 HDR RGB, direct + HDR Alpha 3
+ ---------------------------------------------
+ Table C.2.10 - Color Endpoint Modes.
+ [[ If the HDR profile is not implemented, remove from table C.2.10
+ all rows whose description starts with "HDR", and add to the
+ caption: ]]
+ Modes not described in the CEM column are reserved for HDR modes, and
+ will generate errors in an unextended OpenGL ES implementation.
+
+ In multi-partition mode, the CEM field is of variable width, from 6 to 14
+ bits. The lowest 2 bits of the CEM field specify how the endpoint mode
+ for each partition is calculated:
+
+ ----------------------------------------------------
+ Value Meaning
+ ----------------------------------------------------
+ 00 All color endpoint pairs are of the same type.
+ A full 4-bit CEM is stored in block bits [28:25]
+ and is used for all partitions.
+ 01 All endpoint pairs are of class 0 or 1.
+ 10 All endpoint pairs are of class 1 or 2.
+ 11 All endpoint pairs are of class 2 or 3.
+ ----------------------------------------------------
+ Table C.2.11 - Multi-Partition Color Endpoint Modes
+
+ If the CEM selector value in bits [24:23] is not 00,
+ then data layout is as follows:
+
+ ---------------------------------------------------
+ Part n m l k j i h g
+ ------------------------------------------
+ 2 ... Weight : M1 : ...
+ ------------------------------------------
+ 3 ... Weight : M2 : M1 :M0 : ...
+ ------------------------------------------
+ 4 ... Weight : M3 : M2 : M1 : M0 : ...
+ ------------------------------------------
+
+ Part 28 27 26 25 24 23
+ ----------------------
+ 2 | M0 |C1 |C0 | CEM |
+ ----------------------
+ 3 |M0 |C2 |C1 |C0 | CEM |
+ ----------------------
+ 4 |C3 |C2 |C1 |C0 | CEM |
+ ----------------------
+ ---------------------------------------------------
+ Figure C.4 - Multi-Partition Color Endpoint Modes
+
+ In this view, each partition i has two fields. C is the class
+ selector bit, choosing between the two possible CEM classes (0 indicates
+ the lower of the two classes), and M is a two-bit field specifying
+ the low bits of the color endpoint mode within that class. The
+ additional bits appear at a variable bit position, immediately below the
+ texel weight data.
+
+ The ranges used for the data values are not explicitly specified.
+ Instead, they are derived from the number of available bits remaining
+ after the configuration data and weight data have been specified.
+
+ Details of the decoding procedure for Color Endpoints can be found in
+ section C.2.13.
+
+ C.2.12 Integer Sequence Encoding
+ ---------------------------------
+
+ Both the weight data and the endpoint color data are variable width, and
+ are specified using a sequence of integer values. The range of each
+ value in a sequence (e.g. a color weight) is constrained.
+
+ Since it is often the case that the most efficient range for these
+ values is not a power of two, each value sequence is encoded using a
+ technique known as "integer sequence encoding". This allows efficient,
+ hardware-friendly packing and unpacking of values with non-power-of-two
+ ranges.
+
+ In a sequence, each value has an identical range. The range is specified
+ in one of the following forms:
+
+ Value range MSB encoding LSB encoding Value Block Packed
+ block size
+ ----------- ------------ ------------ ----------- ----- ----------
+ 0 .. 2^n-1 - n bit value m 1 n
+ m (n <= 8)
+ 0 .. (3 * 2^n)-1 Base-3 "trit" n bit value t * 2^n + m 5 8 + 5*n
+ value t m (n <= 6)
+ 0 .. (5 * 2^n)-1 Base-5 "quint" n bit value q * 2^n + m 3 7 + 3*n
+ value q m (n <= 5)
+ -------------------------------------------
+ Table C.2.13 -Encoding for Different Ranges
+
+ Since 3^5 is 243, it is possible to pack five trits into 8 bits(which has
+ 256 possible values), so a trit can effectively be encoded as 1.6 bits.
+ Similarly, since 5^3 is 125, it is possible to pack three quints into
+ 7 bits (which has 128 possible values), so a quint can be encoded as
+ 2.33 bits.
+
+ The encoding scheme packs the trits or quints, and then interleaves the n
+ additional bits in positions that satisfy the requirements of an
+ arbitrary length stream. This makes it possible to correctly specify
+ lists of values whose length is not an integer multiple of 3 or 5 values.
+ It also makes it possible to easily select a value at random within the stream.
+
+ If there are insufficient bits in the stream to fill the final block, then
+ unused (higher order) bits are assumed to be 0 when decoding.
+
+ To decode the bits for value number i in a sequence of bits b, both
+ indexed from 0, perform the following:
+
+ If the range is encoded as n bits per value, then the value is bits
+ b[i*n+n-1:i*n] - a simple multiplexing operation.
+
+ If the range is encoded using a trit, then each block contains 5 values
+ (v0 to v4), each of which contains a trit (t0 to t4) and a corresponding
+ LSB value (m0 to m4). The first bit of the packed block is bit
+ floor(i/5)*(8+5*n). The bits in the block are packed as follows
+ (in this example, n is 4):
+
+ 27 26 25 24 23 22 21 20 19 18 17 16
+ -----------------------------------------------
+ |T7 | m4 |T6 T5 | m3 |T4 |
+ -----------------------------------------------
+
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ --------------------------------------------------------------
+ | m2 |T3 T2 | m1 |T1 T0 | m0 |
+ --------------------------------------------------------------
+
+ Figure C.5 - Trit-based Packing
+
+ The five trits t0 to t4 are obtained by bit manipulations of the 8 bits
+ T[7:0] as follows:
+
+ if T[4:2] = 111
+ C = { T[7:5], T[1:0] }; t4 = t3 = 2
+ else
+ C = T[4:0]
+ if T[6:5] = 11
+ t4 = 2; t3 = T[7]
+ else
+ t4 = T[7]; t3 = T[6:5]
+
+ if C[1:0] = 11
+ t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] }
+ else if C[3:2] = 11
+ t2 = 2; t1 = 2; t0 = C[1:0]
+ else
+ t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] }
+
+ If the range is encoded using a quint, then each block contains 3 values
+ (v0 to v2), each of which contains a quint (q0 to q2) and a corresponding
+ LSB value (m0 to m2). The first bit of the packed block is bit
+ floor(i/3)*(7+3*n).
+
+ The bits in the block are packed as follows (in this example, n is 4):
+
+ 18 17 16
+ -----------
+ |Q6 Q5 | m2
+ -----------
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ ---------------------------------------------------------------
+ m2 |Q4 Q3 | m1 |Q2 Q1 Q0 | m0 |
+ ---------------------------------------------------------------
+
+ Figure C.6 - Quint-based Packing
+
+ The three quints q0 to q2 are obtained by bit manipulations of the 7 bits
+ Q[6:0] as follows:
+
+ if Q[2:1] = 11 and Q[6:5] = 00
+ q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4
+ else
+ if Q[2:1] = 11
+ q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] }
+ else
+ q2 = Q[6:5]; C = Q[4:0]
+
+ if C[2:0] = 101
+ q1 = 4; q0 = C[4:3]
+ else
+ q1 = C[4:3]; q0 = C[2:0]
+
+ Both these procedures ensure a valid decoding for all 128 possible values
+ (even though a few are duplicates). They can also be implemented
+ efficiently in software using small tables.
+
+ Encoding methods are not specified here, although table-based mechanisms
+ work well.
+
+ C.2.13 Endpoint Unquantization
+ -------------------------------
+
+ Each color endpoint is specified as a sequence of integers in a given
+ range. These values are packed using integer sequence encoding, as a
+ stream of bits stored from just above the configuration data, and
+ growing upwards.
+
+ Once unpacked, the values must be unquantized from their storage range,
+ returning them to a standard range of 0..255.
+
+ For bit-only representations, this is simple bit replication from the
+ most significant bit of the value.
+
+ For trit or quint-based representations, this involves a set of bit
+ manipulations and adjustments to avoid the expense of full-width
+ multipliers. This procedure ensures correct scaling, but scrambles
+ the order of the decoded values relative to the encoded values.
+ This must be compensated for using a table in the encoder.
+
+ The initial inputs to the procedure are denoted A (9 bits), B (9 bits),
+ C (9 bits) and D (3 bits) and are decoded using the range as follows:
+
+ ---------------------------------------------------------------
+ Range T Q B Bits A B C D
+ ---------------------------------------------------------------
+ 0..5 1 1 a aaaaaaaaa 000000000 204 Trit value
+ 0..9 1 1 a aaaaaaaaa 000000000 113 Quint value
+ 0..11 1 2 ba aaaaaaaaa b000b0bb0 93 Trit value
+ 0..19 1 2 ba aaaaaaaaa b0000bb00 54 Quint value
+ 0..23 1 3 cba aaaaaaaaa cb000cbcb 44 Trit value
+ 0..39 1 3 cba aaaaaaaaa cb0000cbc 26 Quint value
+ 0..47 1 4 dcba aaaaaaaaa dcb000dcb 22 Trit value
+ 0..79 1 4 dcba aaaaaaaaa dcb0000dc 13 Quint value
+ 0..95 1 5 edcba aaaaaaaaa edcb000ed 11 Trit value
+ 0..159 1 5 edcba aaaaaaaaa edcb0000e 6 Quint value
+ 0..191 1 6 fedcba aaaaaaaaa fedcb000f 5 Trit value
+ ---------------------------------------------------------------
+ Table C.2.16 - Color Unquantization Parameters
+
+ These are then processed as follows:
+
+ T = D * C + B;
+ T = T ^ A;
+ T = (A & 0x80) | (T >> 2);
+
+ Note that the multiply in the first line is nearly trivial as it only
+ needs to multiply by 0, 1, 2, 3 or 4.
+
+ C.2.14 LDR Endpoint Decoding
+ -----------------------------
+ The decoding method used depends on the Color Endpoint Mode (CEM) field,
+ which specifies how many values are used to represent the endpoint.
+
+ The CEM field also specifies how to take the n unquantized color endpoint
+ values v0 to v[n-1] and convert them into two RGBA color endpoints e0
+ and e1.
+
+ The HDR Modes are more complex and do not fit neatly into this section.
+ They are documented in following section.
+
+ The methods can be summarized as follows.
+
+ -------------------------------------------------
+ CEM Range Description n
+ -------------------------------------------------
+ 0 LDR Luminance, direct 2
+ 1 LDR Luminance, base+offset 2
+ 2 HDR Luminance, large range 2
+ 3 HDR Luminance, small range 2
+ 4 LDR Luminance+Alpha, direct 4
+ 5 LDR Luminance+Alpha, base+offset 4
+ 6 LDR RGB, base+scale 4
+ 7 HDR RGB, base+scale 4
+ 8 LDR RGB, direct 6
+ 9 LDR RGB, base+offset 6
+ 10 LDR RGB, base+scale plus two A 6
+ 11 HDR RGB 6
+ 12 LDR RGBA, direct 8
+ 13 LDR RGBA, base+offset 8
+ 14 HDR RGB + LDR Alpha 8
+ 15 HDR RGB + HDR Alpha 8
+ -------------------------------------------------
+ Table C.2.17 -Color Endpoint Modes
+ [[ If the HDR profile is not implemented, remove from table C.2.17
+ all rows whose description starts with "HDR", and add to the
+ caption: ]]
+ Modes not described are reserved, as described in table C.2.10.
+
+ [[ HDR profile only ]]
+ Mode 14 is special in that the alpha values are interpolated linearly,
+ but the color components are interpolated logarithmically. This is the
+ only endpoint format with mixed-mode operation, and will return the
+ error value if encountered in LDR mode.
+
+ Decode the different LDR endpoint modes as follows:
+
+ Mode 0 LDR Luminance, direct
+
+ e0=(v0,v0,v0,0xFF); e1=(v1,v1,v1,0xFF);
+
+ Mode 1 LDR Luminance, base+offset
+
+ L0 = (v0>>2)|(v1&0xC0); L1=L0+(v1&0x3F);
+ if (L1>0xFF) { L1=0xFF; }
+ e0=(L0,L0,L0,0xFF); e1=(L1,L1,L1,0xFF);
+
+ Mode 4 LDR Luminance+Alpha,direct
+
+ e0=(v0,v0,v0,v2);
+ e1=(v1,v1,v1,v3);
+
+ Mode 5 LDR Luminance+Alpha, base+offset
+
+ bit_transfer_signed(v1,v0); bit_transfer_signed(v3,v2);
+ e0=(v0,v0,v0,v2); e1=(v0+v1,v0+v1,v0+v1,v2+v3);
+ clamp_unorm8(e0); clamp_unorm8(e1);
+
+ Mode 6 LDR RGB, base+scale
+
+ e0=(v0*v3>>8,v1*v3>>8,v2*v3>>8, 0xFF);
+ e1=(v0,v1,v2,0xFF);
+
+ Mode 8 LDR RGB, Direct
+
+ s0= v0+v2+v4; s1= v1+v3+v5;
+ if (s1>=s0){e0=(v0,v2,v4,0xFF);
+ e1=(v1,v3,v5,0xFF); }
+ else { e0=blue_contract(v1,v3,v5,0xFF);
+ e1=blue_contract(v0,v2,v4,0xFF); }
+
+ Mode 9 LDR RGB, base+offset
+
+ bit_transfer_signed(v1,v0);
+ bit_transfer_signed(v3,v2);
+ bit_transfer_signed(v5,v4);
+ if(v1+v3+v5 >= 0)
+ { e0=(v0,v2,v4,0xFF); e1=(v0+v1,v2+v3,v4+v5,0xFF); }
+ else
+ { e0=blue_contract(v0+v1,v2+v3,v4+v5,0xFF);
+ e1=blue_contract(v0,v2,v4,0xFF); }
+ clamp_unorm8(e0); clamp_unorm8(e1);
+
+ Mode 10 LDR RGB, base+scale plus two A
+
+ e0=(v0*v3>>8,v1*v3>>8,v2*v3>>8, v4);
+ e1=(v0,v1,v2, v5);
+
+ Mode 12 LDR RGBA, direct
+
+ s0= v0+v2+v4; s1= v1+v3+v5;
+ if (s1>=s0){e0=(v0,v2,v4,v6);
+ e1=(v1,v3,v5,v7); }
+ else { e0=blue_contract(v1,v3,v5,v7);
+ e1=blue_contract(v0,v2,v4,v6); }
+
+ Mode 13 LDR RGBA, base+offset
+
+ bit_transfer_signed(v1,v0);
+ bit_transfer_signed(v3,v2);
+ bit_transfer_signed(v5,v4);
+ bit_transfer_signed(v7,v6);
+ if(v1+v3+v5>=0) { e0=(v0,v2,v4,v6);
+ e1=(v0+v1,v2+v3,v4+v5,v6+v7); }
+ else { e0=blue_contract(v0+v1,v2+v3,v4+v5,v6+v7);
+ e1=blue_contract(v0,v2,v4,v6); }
+ clamp_unorm8(e0); clamp_unorm8(e1);
+
+ The bit_transfer_signed procedure transfers a bit from one value (a)
+ to another (b). Initially, both a and b are in the range 0..255.
+ After calling this procedure, a's range becomes -32..31, and b remains
+ in the range 0..255. Note that, as is often the case, this is easier to
+ express in hardware than in C:
+
+ bit_transfer_signed(int& a, int& b)
+ {
+ b >>= 1;
+ b |= a & 0x80;
+ a >>= 1;
+ a &= 0x3F;
+ if( (a&0x20)!=0 ) a-=0x40;
+ }
+
+ The blue_contract procedure is used to give additional precision to
+ RGB colors near grey:
+
+ color blue_contract( int r, int g, int b, int a )
+ {
+ color c;
+ c.r = (r+b) >> 1;
+ c.g = (g+b) >> 1;
+ c.b = b;
+ c.a = a;
+ return c;
+ }
+
+ The clamp_unorm8 procedure is used to clamp a color into the UNORM8 range:
+
+ void clamp_unorm8(color c)
+ {
+ if(c.r < 0) {c.r=0;} else if(c.r > 255) {c.r=255;}
+ if(c.g < 0) {c.g=0;} else if(c.g > 255) {c.g=255;}
+ if(c.b < 0) {c.b=0;} else if(c.b > 255) {c.b=255;}
+ if(c.a < 0) {c.a=0;} else if(c.a > 255) {c.a=255;}
+ }
+
+ [[ If the HDR profile is not implemented, do not include section
+ C.2.15 ]]
+
+ C.2.15 HDR Endpoint Decoding
+ -------------------------
+
+ For HDR endpoint modes, color values are represented in a 12-bit
+ pseudo-logarithmic representation.
+
+ HDR Endpoint Mode 2
+
+ Mode 2 represents luminance-only data with a large range. It encodes
+ using two values (v0, v1). The complete decoding procedure is as follows:
+
+ if(v1 >= v0)
+ {
+ y0 = (v0 << 4);
+ y1 = (v1 << 4);
+ }
+ else
+ {
+ y0 = (v1 << 4) + 8;
+ y1 = (v0 << 4) - 8;
+ }
+ // Construct RGBA result (0x780 is 1.0f)
+ e0 = (y0, y0, y0, 0x780);
+ e1 = (y1, y1, y1, 0x780);
+
+ HDR Endpoint Mode 3
+
+ Mode 3 represents luminance-only data with a small range. It packs the
+ bits for a base luminance value, together with an offset, into two values
+ (v0, v1):
+
+ Value 7 6 5 4 3 2 1 0
+ ----- ------------------------------
+ v0 |M | L[6:0] |
+ ------------------------------
+ v1 | X[3:0] | d[3:0] |
+ ------------------------------
+
+ Table C.2.18 - HDR Mode 3 Value Layout
+
+ The bit field marked as X allocates different bits to L or d depending
+ on the value of the mode bit M.
+
+ The complete decoding procedure is as follows:
+
+ // Check mode bit and extract.
+ if((v0&0x80) !=0)
+ {
+ y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
+ d = (v1 & 0x1F) << 2;
+ }
+ else
+ {
+ y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
+ d = (v1 & 0x0F) << 1;
+ }
+
+ // Add delta and clamp
+ y1 = y0 + d;
+ if(y1 > 0xFFF) { y1 = 0xFFF; }
+
+ // Construct RGBA result (0x780 is 1.0f)
+ e0 = (y0, y0, y0, 0x780);
+ e1 = (y1, y1, y1, 0x780);
+
+ HDR Endpoint Mode 7
+
+ Mode 7 packs the bits for a base RGB value, a scale factor, and some
+ mode bits into the four values (v0, v1, v2, v3):
+
+ Value 7 6 5 4 3 2 1 0
+ ----- ------------------------------
+ v0 |M[3:2] | R[5:0] |
+ ----- ------------------------------
+ v1 |M1 |X0 |X1 | G[4:0] |
+ ----- ------------------------------
+ v2 |M0 |X2 |X3 | B[4:0] |
+ ----- ------------------------------
+ v3 |X4 |X5 |X6 | S[4:0] |
+ ----- ------------------------------
+ Table C.2.19 - HDR Mode 7 Value Layout
+
+ The mode bits M0 to M3 are a packed representation of an endpoint bit
+ mode, together with the major component index. For modes 0 to 4, the
+ component (red, green, or blue) with the largest magnitude is identified,
+ and the values swizzled to ensure that it is decoded from the red channel.
+
+ The endpoint bit mode is used to determine the number of bits assigned
+ to each component of the endpoint, and the destination of each of the
+ extra bits X0 to X6, as follows:
+
+ ------------------------------------------------------
+ Number of bits Destination of extra bits
+ Mode R G B S X0 X1 X2 X3 X4 X5 X6
+ ------------------------------------------------------
+ 0 11 5 5 7 R9 R8 R7 R10 R6 S6 S5
+ 1 11 6 6 5 R8 G5 R7 B5 R6 R10 R9
+ 2 10 5 5 8 R9 R8 R7 R6 S7 S6 S5
+ 3 9 6 6 7 R8 G5 R7 B5 R6 S6 S5
+ 4 8 7 7 6 G6 G5 B6 B5 R6 R7 S5
+ 5 7 7 7 7 G6 G5 B6 B5 R6 S6 S5
+ ------------------------------------------------------
+ Table C.2.20 - Endpoint Bit Mode
+
+ As noted before, this appears complex when expressed in C, but much
+ easier to achieve in hardware - bit masking, extraction, shifting
+ and assignment usually ends up as a single wire or multiplexer.
+
+ The complete decoding procedure is as follows:
+
+ // Extract mode bits and unpack to major component and mode.
+ int modeval = ((v0&0xC0)>>6) | ((v1&0x80)>>5) | ((v2&0x80)>>4);
+
+ int majcomp;
+ int mode;
+
+ if( (modeval & 0xC ) != 0xC )
+ {
+ majcomp = modeval >> 2; mode = modeval & 3;
+ }
+ else if( modeval != 0xF )
+ {
+ majcomp = modeval & 3; mode = 4;
+ }
+ else
+ {
+ majcomp = 0; mode = 5;
+ }
+
+ // Extract low-order bits of r, g, b, and s.
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ // Extract high-order bits, which may be assigned depending on mode
+ int x0 = (v1 >> 6) & 1; int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1; int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1; int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ // Now move the high-order xs into the right place.
+ int ohm = 1 << mode;
+ if( ohm & 0x30 ) green |= x0 << 6;
+ if( ohm & 0x3A ) green |= x1 << 5;
+ if( ohm & 0x30 ) blue |= x2 << 6;
+ if( ohm & 0x3A ) blue |= x3 << 5;
+ if( ohm & 0x3D ) scale |= x6 << 5;
+ if( ohm & 0x2D ) scale |= x5 << 6;
+ if( ohm & 0x04 ) scale |= x4 << 7;
+ if( ohm & 0x3B ) red |= x4 << 6;
+ if( ohm & 0x04 ) red |= x3 << 6;
+ if( ohm & 0x10 ) red |= x5 << 7;
+ if( ohm & 0x0F ) red |= x2 << 7;
+ if( ohm & 0x05 ) red |= x1 << 8;
+ if( ohm & 0x0A ) red |= x0 << 8;
+ if( ohm & 0x05 ) red |= x0 << 9;
+ if( ohm & 0x02 ) red |= x6 << 9;
+ if( ohm & 0x01 ) red |= x3 << 10;
+ if( ohm & 0x02 ) red |= x5 << 10;
+
+ // Shift the bits to the top of the 12-bit result.
+ static const int shamts[6] = { 1,1,2,3,4,5 };
+ int shamt = shamts[mode];
+ red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt;
+
+ // Minor components are stored as differences
+ if( mode != 5 ) { green = red - green; blue = red - blue; }
+
+ // Swizzle major component into place
+ if( majcomp == 1 ) swap( red, green );
+ if( majcomp == 2 ) swap( red, blue );
+
+ // Clamp output values, set alpha to 1.0
+ e1.r = clamp( red, 0, 0xFFF );
+ e1.g = clamp( green, 0, 0xFFF );
+ e1.b = clamp( blue, 0, 0xFFF );
+ e1.alpha = 0x780;
+
+ e0.r = clamp( red - scale, 0, 0xFFF );
+ e0.g = clamp( green - scale, 0, 0xFFF );
+ e0.b = clamp( blue - scale, 0, 0xFFF );
+ e0.alpha = 0x780;
+
+ HDR Endpoint Mode 11
+
+ Mode 11 specifies two RGB values, which it calculates from a number of
+ bitfields (a, b0, b1, c, d0 and d1) which are packed together with some
+ mode bits into the six values (v0, v1, v2, v3, v4, v5):
+
+ Value 7 6 5 4 3 2 1 0
+ ----- ------------------------------
+ v0 | a[7:0] |
+ ----- ------------------------------
+ v1 |m0 |a8 | c[5:0] |
+ ----- ------------------------------
+ v2 |m1 |X0 | b0[5:0] |
+ ----- ------------------------------
+ v3 |m2 |X1 | b1[5:0] |
+ ----- ------------------------------
+ v4 |mj0|X2 |X4 | d0[4:0] |
+ ----- ------------------------------
+ v5 |mj1|X3 |X5 | d1[4:0] |
+ ----- ------------------------------
+ Table C.2.21 - HDR Mode 11 Value Layout
+
+ If the major component bits mj[1:0 ] are both 1, then the RGB values
+ are specified directly
+
+ Value 7 6 5 4 3 2 1 0
+ ----- ------------------------------
+ v0 | R0[11:4] |
+ ----- ------------------------------
+ v1 | R1[11:4] |
+ ----- ------------------------------
+ v2 | G0[11:4] |
+ ----- ------------------------------
+ v3 | G1[11:4] |
+ ----- ------------------------------
+ v4 | 1 | B0[11:5] |
+ ----- ------------------------------
+ v5 | 1 | B1[11:5] |
+ ----- ------------------------------
+ Table C.2.22 - HDR Mode 11 Value Layout
+
+ The mode bits m[2:0] specify the bit allocation for the different
+ values, and the destinations of the extra bits X0 to X5:
+
+ -------------------------------------------------------------------------
+ Number of bits Destination of extra bits
+ Mode a b c d X0 X1 X2 X3 X4 X5
+ -------------------------------------------------------------------------
+ 0 9 7 6 7 b0[6] b1[6] d0[6] d1[6] d0[5] d1[5]
+ 1 9 8 6 6 b0[6] b1[6] b0[7] b1[7] d0[5] d1[5]
+ 2 10 6 7 7 a[9] c[6] d0[6] d1[6] d0[5] d1[5]
+ 3 10 7 7 6 b0[6] b1[6] a[9] c[6] d0[5] d1[5]
+ 4 11 8 6 5 b0[6] b1[6] b0[7] b1[7] a[9] a[10]
+ 5 11 6 7 6 a[9] a[10] c[7] c[6] d0[5] d1[5]
+ 6 12 7 7 5 b0[6] b1[6] a[11] c[6] a[9] a[10]
+ 7 12 6 7 6 a[9] a[10] a[11] c[6] d0[5] d1[5]
+ -------------------------------------------------------------------------
+ Table C.2.23 - Endpoint Bit Mode
+
+ The complete decoding procedure is as follows:
+
+ // Find major component
+ int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
+
+ // Deal with simple case first
+ if( majcomp == 3 )
+ {
+ e0 = (v0 << 4, v2 << 4, (v4 & 0x7f) << 5, 0x780);
+ e1 = (v1 << 4, v3 << 4, (v5 & 0x7f) << 5, 0x780);
+ return;
+ }
+
+ // Decode mode, parameters.
+ int mode = ((v1&0x80)>>7) | ((v2&0x80)>>6) | ((v3&0x80)>>5);
+ int va = v0 | ((v1 & 0x40) << 2);
+ int vb0 = v2 & 0x3f;
+ int vb1 = v3 & 0x3f;
+ int vc = v1 & 0x3f;
+ int vd0 = v4 & 0x7f;
+ int vd1 = v5 & 0x7f;
+
+ // Assign top bits of vd0, vd1.
+ static const int dbitstab[8] = {7,6,7,6,5,6,5,6};
+ vd0 = signextend( vd0, dbitstab[mode] );
+ vd1 = signextend( vd1, dbitstab[mode] );
+
+ // Extract and place extra bits
+ int x0 = (v2 >> 6) & 1;
+ int x1 = (v3 >> 6) & 1;
+ int x2 = (v4 >> 6) & 1;
+ int x3 = (v5 >> 6) & 1;
+ int x4 = (v4 >> 5) & 1;
+ int x5 = (v5 >> 5) & 1;
+
+ int ohm = 1 << mode;
+ if( ohm & 0xA4 ) va |= x0 << 9;
+ if( ohm & 0x08 ) va |= x2 << 9;
+ if( ohm & 0x50 ) va |= x4 << 9;
+ if( ohm & 0x50 ) va |= x5 << 10;
+ if( ohm & 0xA0 ) va |= x1 << 10;
+ if( ohm & 0xC0 ) va |= x2 << 11;
+ if( ohm & 0x04 ) vc |= x1 << 6;
+ if( ohm & 0xE8 ) vc |= x3 << 6;
+ if( ohm & 0x20 ) vc |= x2 << 7;
+ if( ohm & 0x5B ) vb0 |= x0 << 6;
+ if( ohm & 0x5B ) vb1 |= x1 << 6;
+ if( ohm & 0x12 ) vb0 |= x2 << 7;
+ if( ohm & 0x12 ) vb1 |= x3 << 7;
+
+ // Now shift up so that major component is at top of 12-bit value
+ int shamt = (modeval >> 1) ^ 3;
+ va <<= shamt; vb0 <<= shamt; vb1 <<= shamt;
+ vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt;
+
+ e1.r = clamp( va, 0, 0xFFF );
+ e1.g = clamp( va - vb0, 0, 0xFFF );
+ e1.b = clamp( va - vb1, 0, 0xFFF );
+ e1.alpha = 0x780;
+
+ e0.r = clamp( va - vc, 0, 0xFFF );
+ e0.g = clamp( va - vb0 - vc - vd0, 0, 0xFFF );
+ e0.b = clamp( va - vb1 - vc - vd1, 0, 0xFFF );
+ e0.alpha = 0x780;
+
+ if( majcomp == 1 ) { swap( e0.r, e0.g ); swap( e1.r, e1.g ); }
+ else if( majcomp == 2 ) { swap( e0.r, e0.b ); swap( e1.r, e1.b ); }
+
+ HDR Endpoint Mode 14
+
+ Mode 14 specifies two RGBA values, using the eight values (v0, v1, v2,
+ v3, v4, v5, v6, v7). First, the RGB values are decoded from (v0..v5)
+ using the method from Mode 11, then the alpha values are filled in
+ from v6 and v7:
+
+ // Decode RGB as for mode 11
+ (e0,e1) = decode_mode_11(v0,v1,v2,v3,v4,v5)
+
+ // Now fill in the alphas
+ e0.alpha = v6;
+ e1.alpha = v7;
+
+ Note that in this mode, the alpha values are interpreted (and
+ interpolated) as 8-bit unsigned normalized values, as in the LDR modes.
+ This is the only mode that exhibits this behaviour.
+
+ HDR Endpoint Mode 15
+
+ Mode 15 specifies two RGBA values, using the eight values (v0, v1, v2,
+ v3, v4, v5, v6, v7). First, the RGB values are decoded from (v0..v5)
+ using the method from Mode 11. The alpha values are stored in values
+ v6 and v7 as a mode and two values which are interpreted according
+ to the mode:
+
+ Value 7 6 5 4 3 2 1 0
+ ----- ------------------------------
+ v6 |M0 | A[6:0] |
+ ----- ------------------------------
+ v7 |M1 | B[6:0] |
+ ----- ------------------------------
+ Table C.2.24 - HDR Mode 15 Alpha Value Layout
+
+ The alpha values are decoded from v6 and v7 as follows:
+
+ // Decode RGB as for mode 11
+ (e0,e1) = decode_mode_11(v0,v1,v2,v3,v4,v5)
+
+ // Extract mode bits
+ mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
+ v6 &= 0x7F;
+ v7 &= 0x7F;
+
+ if(mode==3)
+ {
+ // Directly specify alphas
+ e0.alpha = v6 << 5;
+ e1.alpha = v7 << 5;
+ }
+ else
+ {
+ // Transfer bits from v7 to v6 and sign extend v7.
+ v6 |= (v7 << (mode+1))) & 0x780;
+ v7 &= (0x3F >> mode);
+ v7 ^= 0x20 >> mode;
+ v7 -= 0x20 >> mode;
+ v6 <<= (4-mode);
+ v7 <<= (4-mode);
+
+ // Add delta and clamp
+ v7 += v6;
+ v7 = clamp(v7, 0, 0xFFF);
+ e0.alpha = v6;
+ e1.alpha = v7;
+ }
+
+ Note that in this mode, the alpha values are interpreted (and
+ interpolated) as 12-bit HDR values, and are interpolated as
+ for any other HDR component.
+
+ C.2.16 Weight Decoding
+ -----------------------
+ The weight information is stored as a stream of bits, growing downwards
+ from the most significant bit in the block. Bit n in the stream is thus
+ bit 127-n in the block.
+
+ For each location in the weight grid, a value (in the specified range)
+ is packed into the stream. These are ordered in a raster pattern
+ starting from location (0,0,0), with the X dimension increasing fastest,
+ and the Z dimension increasing slowest. If dual-plane mode is selected,
+ both weights are emitted together for each location, plane 0 first,
+ then plane 1.
+
+ C.2.17 Weight Unquantization
+ -----------------------------
+
+ Each weight plane is specified as a sequence of integers in a given
+ range. These values are packed using integer sequence encoding.
+
+ Once unpacked, the values must be unquantized from their storage
+ range, returning them to a standard range of 0..64. The procedure
+ for doing so is similar to the color endpoint unquantization.
+
+ First, we unquantize the actual stored weight values to the range 0..63.
+
+ For bit-only representations, this is simple bit replication from the
+ most significant bit of the value.
+
+ For trit or quint-based representations, this involves a set of bit
+ manipulations and adjustments to avoid the expense of full-width
+ multipliers.
+
+ For representations with no additional bits, the results are as follows:
+
+ Range 0 1 2 3 4
+ --------------------------
+ 0..2 0 32 63 - -
+ 0..4 0 16 32 47 63
+ --------------------------
+ Table C.2.25 - Weight Unquantization Values
+
+ For other values, we calculate the initial inputs to a bit manipulation
+ procedure. These are denoted A (7 bits), B (7 bits), C (7 bits), and
+ D (3 bits) and are decoded using the range as follows:
+
+ Range T Q B Bits A B C D
+ -------------------------------------------------------
+ 0..5 1 1 a aaaaaaa 0000000 50 Trit value
+ 0..9 1 1 a aaaaaaa 0000000 28 Quint value
+ 0..11 1 2 ba aaaaaaa b000b0b 23 Trit value
+ 0..19 1 2 ba aaaaaaa b0000b0 13 Quint value
+ 0..23 1 3 cba aaaaaaa cb000cb 11 Trit value
+ -------------------------------------------------------
+ Table C.2.26 - Weight Unquantization Parameters
+
+ These are then processed as follows:
+
+ T = D * C + B;
+ T = T ^ A;
+ T = (A & 0x20) | (T >> 2);
+
+ Note that the multiply in the first line is nearly trivial as it only
+ needs to multiply by 0, 1, 2, 3 or 4.
+
+ As a final step, for all types of value, the range is expanded from
+ 0..63 up to 0..64 as follows:
+
+ if (T > 32) { T += 1; }
+
+ This allows the implementation to use 64 as a divisor during inter-
+ polation, which is much easier than using 63.
+
+ C.2.18 Weight Infill
+ ---------------------
+
+ After unquantization, the weights are subject to weight selection and
+ infill. The infill method is used to calculate the weight for a texel
+ position, based on the weights in the stored weight grid array (which
+ may be a different size).
+
+ The procedure below must be followed exactly, to ensure bit exact
+ results.
+
+ The block size is specified as two dimensions along the s and t
+ axes (Bs, Bt). Texel coordinates within the block (s,t) can have values
+ from 0 to one less than the block dimension in that axis.
+
+ For each block dimension, we compute scale factors (Ds, Dt)
+
+ Ds = floor( (1024 + floor(Bs/2)) / (Bs-1) );
+ Dt = floor( (1024 + floor(Bt/2)) / (Bt-1) );
+
+ Since the block dimensions are constrained, these are easily looked up
+ in a table. These scale factors are then used to scale the (s,t)
+ coordinates to a homogeneous coordinate (cs, ct):
+
+ cs = Ds * s;
+ ct = Dt * t;
+
+ This homogeneous coordinate (cs, ct) is then scaled again to give
+ a coordinate (gs, gt) in the weight-grid space . The weight-grid is
+ of size (N, M), as specified in the block mode field:
+
+ gs = (cs*(N-1)+32) >> 6;
+ gt = (ct*(M-1)+32) >> 6;
+
+ The resulting coordinates may be in the range 0..176. These are inter-
+ preted as 4:4 unsigned fixed point numbers in the range 0.0 .. 11.0.
+
+ If we label the integral parts of these (js, jt) and the fractional
+ parts (fs, ft), then:
+
+ js = gs >> 4; fs = gs & 0x0F;
+ jt = gt >> 4; ft = gt & 0x0F;
+
+ These values are then used to bilinearly interpolate between the stored
+ weights.
+
+ v0 = js + jt*N;
+ p00 = decode_weight(v0);
+ p01 = decode_weight(v0 + 1);
+ p10 = decode_weight(v0 + N);
+ p11 = decode_weight(v0 + N + 1);
+
+ The function decode_weight(n) decodes the nth weight in the stored weight
+ stream. The values p00 to p11 are the weights at the corner of the square
+ in which the texel position resides. These are then weighted using the
+ fractional position to produce the effective weight i as follows:
+
+ w11 = (fs*ft+8) >> 4;
+ w10 = ft - w11;
+ w01 = fs - w11;
+ w00 = 16 - fs - ft + w11;
+ i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
+
+ C.2.19 Weight Application
+ --------------------------
+ Once the effective weight i for the texel has been calculated, the color
+ endpoints are interpolated and expanded.
+
+ For LDR endpoint modes, each color component C is calculated from the
+ corresponding 8-bit endpoint components C0 and C1 as follows:
+
+ If sRGB conversion is not enabled, or for the alpha channel in any case,
+ C0 and C1 are first expanded to 16 bits by bit replication:
+
+ C0 = (C0 << 8) | C0; C1 = (C1 << 8) | C1;
+
+ If sRGB conversion is enabled, C0 and C1 for the R, G, and B channels
+ are expanded to 16 bits differently, as follows:
+
+ C0 = (C0 << 8) | 0x80; C1 = (C1 << 8) | 0x80;
+
+ C0 and C1 are then interpolated to produce a UNORM16 result C:
+
+ C = floor( (C0*(64-i) + C1*i + 32)/64 )
+
+ If sRGB conversion is enabled, the top 8 bits of the interpolation
+ result for the R, G and B channels are passed to the external sRGB
+ conversion block. Otherwise, if C = 65535, then the final result is
+ 1.0 (0x3C00) otherwise C is divided by 65536 and the infinite-precision
+ result of the division is converted to FP16 with round-to-zero
+ semantics.
+
+ For HDR endpoint modes, color values are represented in a 12-bit
+ pseudo-logarithmic representation, and interpolation occurs in a
+ piecewise-approximate logarithmic manner as follows:
+
+ In LDR mode, the error result is returned.
+
+ In HDR mode, the color components from each endpoint, C0 and C1, are
+ initially shifted left 4 bits to become 16-bit integer values and these
+ are interpolated in the same way as LDR. The 16-bit value C is then
+ decomposed into the top five bits, E, and the bottom 11 bits M, which
+ are then processed and recombined with E to form the final value Cf:
+
+ C = floor( (C0*(64-i) + C1*i + 32)/64 )
+ E = (C&0xF800) >> 11; M = C&0x7FF;
+ if (M < 512) { Mt = 3*M; }
+ else if (M >= 1536) { Mt = 5*M - 2048; }
+ else { Mt = 4*M - 512; }
+ Cf = (E<<10) + (Mt>>3)
+
+ This interpolation is a considerably closer approximation to a
+ logarithmic space than simple 16-bit interpolation.
+
+ This final value Cf is interpreted as an IEEE FP16 value. If the result
+ is +Inf or NaN, it is converted to the bit pattern 0x7BFF, which is the
+ largest representable finite value.
+
+ C.2.20 Dual-Plane Decoding
+ ---------------------------
+ If dual-plane mode is disabled, all of the endpoint components are inter-
+ polated using the same weight value.
+
+ If dual-plane mode is enabled, two weights are stored with each texel.
+ One component is then selected to use the second weight for interpolation,
+ instead of the first weight. The first weight is then used for all other
+ components.
+
+ The component to treat specially is indicated using the 2-bit Color
+ Component Selector (CCS) field as follows:
+
+ Value Weight 0 Weight 1
+ --------------------------
+ 0 GBA R
+ 1 RBA G
+ 2 RGA B
+ 3 RGB A
+ --------------------------
+ Table C.2.28 - Dual Plane Color Component Selector Values
+
+ The CCS bits are stored at a variable position directly below the weight
+ bits and any additional CEM bits.
+
+ C.2.21 Partition Pattern Generation
+ ------------------------------------
+
+ When multiple partitions are active, each texel position is assigned a
+ partition index. This partition index is calculated using a seed (the
+ partition pattern index), the texel's x,y,z position within the block,
+ and the number of partitions. An additional argument, small_block, is
+ set to 1 if the number of texels in the block is less than 31,
+ otherwise it is set to 0.
+
+ This function is specified in terms of x, y and z in order to support
+ 3D textures. For 2D textures and texture slices, z will always be 0.
+
+ The full partition selection algorithm is as follows:
+
+ int select_partition(int seed, int x, int y, int z,
+ int partitioncount, int small_block)
+ {
+ if( small_block ){ x <<= 1; y <<= 1; z <<= 1; }
+ seed += (partitioncount-1) * 1024;
+ uint32_t rnum = hash52(seed);
+ uint8_t seed1 = rnum & 0xF;
+ uint8_t seed2 = (rnum >> 4) & 0xF;
+ uint8_t seed3 = (rnum >> 8) & 0xF;
+ uint8_t seed4 = (rnum >> 12) & 0xF;
+ uint8_t seed5 = (rnum >> 16) & 0xF;
+ uint8_t seed6 = (rnum >> 20) & 0xF;
+ uint8_t seed7 = (rnum >> 24) & 0xF;
+ uint8_t seed8 = (rnum >> 28) & 0xF;
+ uint8_t seed9 = (rnum >> 18) & 0xF;
+ uint8_t seed10 = (rnum >> 22) & 0xF;
+ uint8_t seed11 = (rnum >> 26) & 0xF;
+ uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
+
+ seed1 *= seed1; seed2 *= seed2;
+ seed3 *= seed3; seed4 *= seed4;
+ seed5 *= seed5; seed6 *= seed6;
+ seed7 *= seed7; seed8 *= seed8;
+ seed9 *= seed9; seed10 *= seed10;
+ seed11 *= seed11; seed12 *= seed12;
+
+ int sh1, sh2, sh3;
+ if( seed & 1 )
+ { sh1 = (seed&2 ? 4:5); sh2 = (partitioncount==3 ? 6:5); }
+ else
+ { sh1 = (partitioncount==3 ? 6:5); sh2 = (seed&2 ? 4:5); }
+ sh3 = (seed & 0x10) ? sh1 : sh2;
+
+ seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
+ seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
+ seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
+
+ int a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
+ int b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
+ int c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
+ int d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
+
+ a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
+
+ if( partitioncount < 4 ) d = 0;
+ if( partitioncount < 3 ) c = 0;
+
+ if( a >= b && a >= c && a >= d ) return 0;
+ else if( b >= c && b >= d ) return 1;
+ else if( c >= d ) return 2;
+ else return 3;
+ }
+
+ As has been observed before, the bit selections are much easier to
+ express in hardware than in C.
+
+ The seed is expanded using a hash function hash52, which is defined as
+ follows:
+
+ uint32_t hash52( uint32_t p )
+ {
+ p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
+ p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
+ p ^= p << 6; p ^= p >> 17;
+ return p;
+ }
+
+ This assumes that all operations act on 32-bit values
+
+ C.2.22 Data Size Determination
+ -------------------------------
+
+ The size of the data used to represent color endpoints is not
+ explicitly specified. Instead, it is determined from the block mode and
+ number of partitions as follows:
+
+ config_bits = 17;
+ if(num_partitions>1)
+ if(single_CEM)
+ config_bits = 29;
+ else
+ config_bits = 25 + 3*num_partitions;
+
+ num_weights = M * N * Q; // size of weight grid
+
+ if(dual_plane)
+ config_bits += 2;
+ num_weights *= 2;
+
+ weight_bits = ceil(num_weights*8*trits_in_weight_range/5) +
+ ceil(num_weights*7*quints_in_weight_range/3) +
+ num_weights*bits_in_weight_range;
+
+ remaining_bits = 128 - config_bits - weight_bits;
+
+ num_CEM_pairs = base_CEM_class+1 + count_bits(extra_CEM_bits);
+
+ The CEM value range is then looked up from a table indexed by remaining
+ bits and num_CEM_pairs. This table is initialized such that the range
+ is as large as possible, consistent with the constraint that the number
+ of bits required to encode num_CEM_pairs pairs of values is not more
+ than the number of remaining bits.
+
+ An equivalent iterative algorithm would be:
+
+ num_CEM_values = num_CEM_pairs*2;
+
+ for(range = each possible CEM range in descending order of size)
+ {
+ CEM_bits = ceil(num_CEM_values*8*trits_in_CEM_range/5) +
+ ceil(num_CEM_values*7*quints_in_CEM_range/3) +
+ num_CEM_values*bits_in_CEM_range;
+
+ if(CEM_bits <= remaining_bits)
+ break;
+ }
+ return range;
+
+ In cases where this procedure results in unallocated bits, these bits
+ are not read by the decoding process and can have any value.
+
+ C.2.23 Void-Extent Blocks
+ --------------------------
+
+ A void-extent block is a block encoded with a single color. It also
+ specifies some additional information about the extent of the single-
+ color area beyond this block, which can optionally be used by a
+ decoder to reduce or prevent redundant block fetches.
+
+ The layout of a 2D Void-Extent block is as follows:
+
+ 127 126 125 124 123 122 121 120 119 118 117 116 115 114 113 112
+ ---------------------------------------------------------------
+ | Block color A component |
+ ---------------------------------------------------------------
+
+ 111 110 109 108 107 106 105 104 103 102 101 100 99 98 97 96
+ ----------------------------------------------------------------
+ | Block color B component |
+ ----------------------------------------------------------------
+
+ 95 94 93 92 91 90 89 88 87 86 85 84 83 82 81 80
+ ----------------------------------------------------------------
+ | Block color G component |
+ ----------------------------------------------------------------
+ 79 78 77 76 75 74 73 72 71 70 69 68 67 66 65 64
+ ----------------------------------------------------------------
+ | Block color R component |
+ ----------------------------------------------------------------
+
+ 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
+ ----------------------------------------------------------------
+ | Void-extent maximum T coordinate | Min T |
+ ----------------------------------------------------------------
+
+ 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
+ ----------------------------------------------------------------
+ Void-extent minimum T coordinate | Void-extent max S |
+ ----------------------------------------------------------------
+
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+ ----------------------------------------------------------------
+ Void-extent max S coord | Void-extent minimum S coordinate |
+ ----------------------------------------------------------------
+ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ ----------------------------------------------------------------
+ Min S coord | 1 | 1 | D | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 |
+ ----------------------------------------------------------------
+ -------------------------------------------------
+ Figure C.7 - 2D Void-Extent Block Layout Overview
+
+ Bit 9 is the Dynamic Range flag, which indicates the format in which
+ colors are stored. A 0 value indicates LDR, in which case the color
+ components are stored as UNORM16 values. A 1 indicates HDR, in which
+ case the color components are stored as FP16 values.
+
+ The reason for the storage of UNORM16 values in the LDR case is due
+ to the possibility that the value will need to be passed on to sRGB
+ conversion. By storing the color value in the format which comes out
+ of the interpolator, before the conversion to FP16, we avoid having
+ to have separate versions for sRGB and linear modes.
+
+ If a void-extent block with HDR values is decoded in LDR mode, then
+ the result will be the error color, opaque magenta, for all texels
+ within the block.
+
+ In the HDR case, if the color component values are infinity or NaN, this
+ will result in undefined behavior. As usual, this must not lead to GL
+ interruption or termination.
+
+ Bits 10 and 11 are reserved and must be 1.
+
+ The minimum and maximum coordinate values are treated as unsigned
+ integers and then normalized into the range 0..1 (by dividing by 2^13-1
+ or 2^9-1, for 2D and 3D respectively). The maximum values for each
+ dimension must be greater than the corresponding minimum values,
+ unless they are all all-1s.
+
+ If all the coordinates are all-1s, then the void extent is ignored,
+ and the block is simply a constant-color block.
+
+ The existence of single-color blocks with void extents must not produce
+ results different from those obtained if these single-color blocks are
+ defined without void-extents. Any situation in which the results would
+ differ is invalid. Results from invalid void extents are undefined.
+
+ If a void-extent appears in a MIPmap level other than the most detailed
+ one, then the extent will apply to all of the more detailed levels too.
+ This allows decoders to avoid sampling more detailed MIPmaps.
+
+ If the more detailed MIPmap level is not a constant color in this region,
+ then the block may be marked as constant color, but without a void extent,
+ as detailed above.
+
+ If a void-extent extends to the edge of a texture, then filtered texture
+ colors may not be the same color as that specified in the block, due to
+ texture border colors, wrapping, or cube face wrapping.
+
+ Care must be taken when updating or extracting partial image data that
+ void-extents in the image do not become invalid.
+
+ C.2.24 Illegal Encodings
+ -------------------------
+
+ In ASTC, there is a variety of ways to encode an illegal block. Decoders
+ are required to recognize all illegal blocks and emit the standard error
+ color value upon encountering an illegal block.
+
+ Here is a comprehensive list of situations that represent illegal block
+ encodings:
+
+ * The block mode specified is one of the modes explicitly listed
+ as Reserved.
+ * A 2D void-extent block that has any of the reserved bits not
+ set to 1.
+ * A block mode has been specified that would require more than
+ 64 weights total.
+ * A block mode has been specified that would require more than
+ 96 bits for integer sequence encoding of the weight grid.
+ * A block mode has been specifed that would require fewer than
+ 24 bits for integer sequence encoding of the weight grid.
+ * The size of the weight grid exceeds the size of the block footprint
+ in any dimension.
+ * Color endpoint modes have been specified such that the color
+ integer sequence encoding would require more than 18 integers.
+ * The number of bits available for color endpoint encoding after all
+ the other fields have been counted is less than ceil(13C/5) where C
+ is the number of color endpoint integers (this would restrict color
+ integers to a range smaller than 0..5, which is not supported).
+ * Dual weight mode is enabled for a block with 4 partitions.
+ * Void-Extent blocks where the low coordinate for some texture axis
+ is greater than or equal to the high coordinate.
+
+ Note also that, in LDR mode, a block which has both HDR and LDR endpoint
+ modes assigned to different partitions is not an error block. Only those
+ texels which belong to the HDR partition will result in the error color.
+ Texels belonging to a LDR partition will be decoded as normal.
+
+ C.2.25 LDR PROFILE SUPPORT
+ ---------------------------
+
+ Implementations of the LDR Profile must satisfy the following requirements:
+
+ * All textures with valid encodings for LDR Profile must decode
+ identically using either a LDR Profile, HDR Profile, or Full Profile
+ decoder.
+ * All features included only in the HDR Profile or Full Profile must be
+ treated as reserved in the LDR Profile, and return the error color on
+ decoding.
+ * Any sequence of API calls valid for the LDR Profile must also be valid
+ for the HDR Profile or Full Profile and return identical results when
+ given a texture encoded for the LDR Profile.
+
+ The feature subset for the LDR profile is:
+
+ * 2D textures only, including 2D, 2D array, cube map face,
+ and cube map array texture targets.
+ * Only those block sizes listed in Table C.2.2 are supported.
+ * LDR operation mode only.
+ * Only LDR endpoint formats must be supported, namely formats
+ 0, 1, 4, 5, 6, 8, 9, 10, 12, 13.
+ * Decoding from a HDR endpoint results in the error color.
+ * Interpolation returns UNORM8 results when used in conjunction
+ with sRGB.
+ * LDR void extent blocks must be supported, but void extents
+ may not be checked."
+
+ If only the LDR profile is supported, read this extension by striking
+ all descriptions of HDR modes and decoding algorithms. The extension
+ documents how to modify the document for some particularly tricky cases,
+ but the general rule is as described in this paragraph.
+
+Interactions with immutable-format texture images
+
+ ASTC texture formats are supported by immutable-format textures only if
+ such textures are supported by the underlying implementation (e.g.
+ OpenGL 4.1 or later, OpenGL ES 3.0 or later, or earlier versions
+ supporting the GL_EXT_texture_storage extension). Otherwise, remove all
+ references to the Tex*Storage* commands from this specification.
+
+Interactions with texture cube map arrays
+
+ ASTC textures are supported for the TEXTURE_CUBE_MAP_ARRAY target only
+ when cube map arrays are supported by the underlying implementation
+ (e.g. OpenGL 4.0 or later, or an OpenGL or OpenGL ES version supporting
+ an extension defining cube map arrays). Otherwise, remove all references
+ to texture cube map arrays from this specification.
+
+Interactions with OpenGL (all versions)
+
+ ASTC is not supported for 1D textures and texture rectangles, and does
+ not support non-zero borders.
+
+ Add the following error conditions to CompressedTexImage*D:
+
+ "An INVALID_ENUM error is generated by CompressedTexImage1D if
+ is one of the ASTC formats.
+
+ An INVALID_OPERATION error is generated by CompressedTexImage2D
+ and CompressedTexImage3D if is one of the ASTC
+ formats and is non-zero."
+
+ Add the following error conditions to CompressedTexSubImage*D:
+
+ "An INVALID_ENUM error is generated by CompressedTex*SubImage1D
+ if the internal format of the texture is one of the ASTC formats.
+
+ An INVALID_OPERATION error is generated by CompressedTex*SubImage2D
+ if the internal format of the texture is one of the ASTC formats
+ and is non-zero."
+
+ Add the following error conditions to TexStorage1D and TextureStorage1D:
+
+ "An INVALID_ENUM error is generated by TexStorage1D and TextureStorage1D
+ if is one of the ASTC formats."
+
+ Add the following error conditions to TexStorage2D and TextureStorage2D
+ for versions of OpenGL that support texture rectangles:
+
+ "An INVALID_OPERATON error is generated by TexStorage2D and
+ TextureStorage2D if