diff --git a/.gitignore b/.gitignore index b79ce264c8..3284865d6c 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,6 @@ tests/.hypothesis zarr/version.py zarr.egg-info/ + +# zarr-metadata package lockfile (a library, not an app) +packages/zarr-metadata/uv.lock diff --git a/packages/zarr-metadata/README.md b/packages/zarr-metadata/README.md new file mode 100644 index 0000000000..2e4b44ae1a --- /dev/null +++ b/packages/zarr-metadata/README.md @@ -0,0 +1,15 @@ +# zarr-metadata + +Spec-defined metadata types for Zarr v2 and v3, distributed as pure-typing +artifacts (TypedDicts, type aliases, unions). No runtime logic, no numpy, +no storage backends. + +`zarr-metadata` is developed in the [zarr-python](https://github.com/zarr-developers/zarr-python) +repository at `packages/zarr-metadata/`. + +## Principle + +Every type that models a spec artifact (v2 or v3 array/group/consolidated +metadata, chunk grids, codec metadata, dtype shapes) belongs in +`zarr-metadata`. Zarr-python implementation details (runtime codecs, +config dataclasses, numcodecs-derived helpers) stay in `zarr`. diff --git a/packages/zarr-metadata/pyproject.toml b/packages/zarr-metadata/pyproject.toml new file mode 100644 index 0000000000..40dcdc9c17 --- /dev/null +++ b/packages/zarr-metadata/pyproject.toml @@ -0,0 +1,51 @@ +[build-system] +requires = ["hatchling>=1.29.0"] +build-backend = "hatchling.build" + +[project] +name = "zarr-metadata" +version = "0.1.0" +description = "Spec-defined metadata types for Zarr v2 and v3." +readme = "README.md" +requires-python = ">=3.11" +license = "MIT" +authors = [ + { name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Typing :: Typed", +] +dependencies = [ + "typing_extensions>=4.13", +] + +[project.optional-dependencies] +test = ["pytest"] + +[tool.hatch.build.targets.wheel] +packages = ["src/zarr_metadata"] + +[tool.numpydoc_validation] +checks = [ + "GL10", + "SS04", + "PR02", + "PR03", + "PR05", + "PR06", +] + +[tool.pyright] +include = ["src"] +enableExperimentalFeatures = true +typeCheckingMode = "strict" +pythonVersion = "3.11" diff --git a/packages/zarr-metadata/src/zarr_metadata/__init__.py b/packages/zarr-metadata/src/zarr_metadata/__init__.py new file mode 100644 index 0000000000..a7d39bec1a --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/__init__.py @@ -0,0 +1,24 @@ +from zarr_metadata.common import JSON, NamedConfig, NamedRequiredConfig +from zarr_metadata.v2.array import ArrayMetadataV2 +from zarr_metadata.v2.group import GroupMetadataV2 +from zarr_metadata.v3.array import ArrayMetadataV3 +from zarr_metadata.v3.group import GroupMetadataV3 + +ArrayMetadata = ArrayMetadataV2 | ArrayMetadataV3 +"""Any Zarr array metadata document (v2 or v3).""" + +GroupMetadata = GroupMetadataV2 | GroupMetadataV3 +"""Any Zarr group metadata document (v2 or v3).""" + + +__all__ = [ + "JSON", + "ArrayMetadata", + "ArrayMetadataV2", + "ArrayMetadataV3", + "GroupMetadata", + "GroupMetadataV2", + "GroupMetadataV3", + "NamedConfig", + "NamedRequiredConfig", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/common.py b/packages/zarr-metadata/src/zarr_metadata/common.py new file mode 100644 index 0000000000..03e0394222 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/common.py @@ -0,0 +1,47 @@ +""" +Top-level cross-version primitives for Zarr metadata. + +Version-specific types live under `zarr_metadata.v2` and `zarr_metadata.v3`. +Codec and dtype spec types live under `zarr_metadata.v3.codec` and +`zarr_metadata.v3.data_type`. +""" + +from collections.abc import Mapping, Sequence +from typing import Generic, NotRequired, TypedDict, TypeVar + +from typing_extensions import ReadOnly + +JSON = str | int | float | bool | Mapping[str, "JSON"] | Sequence["JSON"] | None +"""Any valid JSON value.""" + + +TName = TypeVar("TName", bound=str) +TConfig = TypeVar("TConfig", bound=Mapping[str, object]) + + +class NamedConfig(TypedDict, Generic[TName, TConfig]): # noqa: UP046 + """ + Named-config envelope with optional configuration. + + Generic with two parameters: name literal and configuration mapping. + + Uses the PEP 484 ``Generic[T]`` form rather than PEP 695 ``[T]`` syntax + so the package supports Python 3.11. + """ + + name: ReadOnly[TName] + configuration: NotRequired[ReadOnly[TConfig]] + + +class NamedRequiredConfig(TypedDict, Generic[TName, TConfig]): # noqa: UP046 + """ + Named-config envelope with required configuration. + + Generic with two parameters: name literal and configuration mapping. + + Uses the PEP 484 ``Generic[T]`` form rather than PEP 695 ``[T]`` syntax + so the package supports Python 3.11. + """ + + name: ReadOnly[TName] + configuration: ReadOnly[TConfig] diff --git a/packages/zarr-metadata/src/zarr_metadata/py.typed b/packages/zarr-metadata/src/zarr_metadata/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py new file mode 100644 index 0000000000..ea5d08af7b --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py @@ -0,0 +1,15 @@ +"""Zarr v2 metadata types.""" + +from zarr_metadata.v2.array import ArrayMetadataV2, DataTypeV2, DataTypeV2Structured +from zarr_metadata.v2.codec import NumcodecsConfig +from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2 +from zarr_metadata.v2.group import GroupMetadataV2 + +__all__ = [ + "ArrayMetadataV2", + "ConsolidatedMetadataV2", + "DataTypeV2", + "DataTypeV2Structured", + "GroupMetadataV2", + "NumcodecsConfig", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/array.py b/packages/zarr-metadata/src/zarr_metadata/v2/array.py new file mode 100644 index 0000000000..cbcbccc0e2 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v2/array.py @@ -0,0 +1,59 @@ +"""Zarr v2 array metadata types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, NotRequired, TypedDict + +if TYPE_CHECKING: + from zarr_metadata.common import JSON + from zarr_metadata.v2.codec import NumcodecsConfig + + +class DataTypeV2Structured(TypedDict): + """ + A single field entry inside a structured v2 dtype. + + Spec-faithful: `datatype` is a numpy-style dtype string; `shape` is + present only when the field is a subarray field. + + See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding + """ + + fieldname: str + datatype: str + shape: NotRequired[tuple[int, ...]] + + +DataTypeV2 = str | tuple[DataTypeV2Structured, ...] +"""The v2 dtype representation. + +Simple dtypes are numpy-style strings (e.g. `"kji...` (where `` is `separator`). + +See https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#chunk-key-encoding +""" + +from typing import Final, Literal, NotRequired, TypedDict + +from zarr_metadata.v3.chunk_key_encoding import ChunkKeySeparator + +DEFAULT_CHUNK_KEY_ENCODING_NAME: Final = "default" +"""The `name` field value of the default chunk key encoding.""" + +DefaultChunkKeyEncodingName = Literal["default"] +"""Literal type of the `name` field of the default chunk key encoding.""" + + +class DefaultChunkKeyEncodingConfiguration(TypedDict): + """Configuration for the default chunk key encoding. + + `separator` is optional and defaults to `"/"` per spec. + """ + + separator: NotRequired[ChunkKeySeparator] + + +class DefaultChunkKeyEncoding(TypedDict): + """Default chunk key encoding metadata.""" + + name: DefaultChunkKeyEncodingName + configuration: NotRequired[DefaultChunkKeyEncodingConfiguration] + + +__all__ = [ + "DEFAULT_CHUNK_KEY_ENCODING_NAME", + "DefaultChunkKeyEncoding", + "DefaultChunkKeyEncodingConfiguration", + "DefaultChunkKeyEncodingName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py new file mode 100644 index 0000000000..1864e8c5a2 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py @@ -0,0 +1,42 @@ +""" +V2-compatibility chunk key encoding (Zarr v3 core spec). + +Intended only to allow existing v2 arrays to be converted to v3 without +having to rename chunks. Not recommended for new arrays. + +See https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#chunk-key-encoding +""" + +from typing import Final, Literal, NotRequired, TypedDict + +from zarr_metadata.v3.chunk_key_encoding import ChunkKeySeparator + +V2_CHUNK_KEY_ENCODING_NAME: Final = "v2" +"""The `name` field value of the v2 chunk key encoding.""" + +V2ChunkKeyEncodingName = Literal["v2"] +"""Literal type of the `name` field of the v2 chunk key encoding.""" + + +class V2ChunkKeyEncodingConfiguration(TypedDict): + """Configuration for the v2 chunk key encoding. + + `separator` is optional and defaults to `"."` per spec. + """ + + separator: NotRequired[ChunkKeySeparator] + + +class V2ChunkKeyEncoding(TypedDict): + """V2-compatibility chunk key encoding metadata.""" + + name: V2ChunkKeyEncodingName + configuration: NotRequired[V2ChunkKeyEncodingConfiguration] + + +__all__ = [ + "V2_CHUNK_KEY_ENCODING_NAME", + "V2ChunkKeyEncoding", + "V2ChunkKeyEncodingConfiguration", + "V2ChunkKeyEncodingName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py new file mode 100644 index 0000000000..97b3a69551 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py @@ -0,0 +1,19 @@ +""" +Zarr codec metadata types. +""" + +from collections.abc import Mapping + +Codec = str | Mapping[str, object] +""" +The widest JSON shape that can specify a codec (v2 or v3). + +For v3, a codec is a `{"name": ..., "configuration": ...}` mapping (or +a bare `str` shorthand); for v2, a codec is the numcodecs JSON dict. +The accepted-input shape is the union of both. +""" + + +__all__ = [ + "Codec", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py new file mode 100644 index 0000000000..1cd79f3d43 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py @@ -0,0 +1,66 @@ +""" +Blosc codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/index.html +""" + +from typing import Final, Literal, TypedDict + +BLOSC_CODEC_NAME: Final = "blosc" +"""The `name` field value of the `blosc` codec.""" + +BloscCodecName = Literal["blosc"] +"""Literal type of the `name` field of the `blosc` codec.""" + +BLOSC_SHUFFLE_NOSHUFFLE: Final = "noshuffle" +BLOSC_SHUFFLE_SHUFFLE: Final = "shuffle" +BLOSC_SHUFFLE_BITSHUFFLE: Final = "bitshuffle" + +BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"] +"""Blosc shuffle mode names.""" + +BLOSC_CNAME_LZ4: Final = "lz4" +BLOSC_CNAME_LZ4HC: Final = "lz4hc" +BLOSC_CNAME_BLOSCLZ: Final = "blosclz" +BLOSC_CNAME_SNAPPY: Final = "snappy" +BLOSC_CNAME_ZLIB: Final = "zlib" +BLOSC_CNAME_ZSTD: Final = "zstd" + +BloscCName = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"] +"""Blosc compressor identifiers.""" + + +class BloscCodecConfiguration(TypedDict): + """Configuration for the Zarr v3 `blosc` codec.""" + + cname: BloscCName + clevel: int + shuffle: BloscShuffle + blocksize: int + typesize: int + + +class BloscCodec(TypedDict): + """`blosc` codec metadata.""" + + name: BloscCodecName + configuration: BloscCodecConfiguration + + +__all__ = [ + "BLOSC_CNAME_BLOSCLZ", + "BLOSC_CNAME_LZ4", + "BLOSC_CNAME_LZ4HC", + "BLOSC_CNAME_SNAPPY", + "BLOSC_CNAME_ZLIB", + "BLOSC_CNAME_ZSTD", + "BLOSC_CODEC_NAME", + "BLOSC_SHUFFLE_BITSHUFFLE", + "BLOSC_SHUFFLE_NOSHUFFLE", + "BLOSC_SHUFFLE_SHUFFLE", + "BloscCName", + "BloscCodec", + "BloscCodecConfiguration", + "BloscCodecName", + "BloscShuffle", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py new file mode 100644 index 0000000000..fe6e7b2579 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py @@ -0,0 +1,47 @@ +""" +Bytes codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/index.html +""" + +from typing import Final, Literal, NotRequired, TypedDict + +BYTES_CODEC_NAME: Final = "bytes" +"""The `name` field value of the `bytes` codec.""" + +BytesCodecName = Literal["bytes"] +"""Literal type of the `name` field of the `bytes` codec.""" + +BYTES_ENDIAN_LITTLE: Final = "little" +BYTES_ENDIAN_BIG: Final = "big" + +Endian = Literal["little", "big"] +"""Byte order of multi-byte numeric data.""" + + +class BytesCodecConfiguration(TypedDict): + """ + Configuration for the Zarr v3 `bytes` codec. + + The `endian` field is required for multi-byte data types. + """ + + endian: NotRequired[Endian] + + +class BytesCodec(TypedDict): + """`bytes` codec metadata.""" + + name: BytesCodecName + configuration: BytesCodecConfiguration + + +__all__ = [ + "BYTES_CODEC_NAME", + "BYTES_ENDIAN_BIG", + "BYTES_ENDIAN_LITTLE", + "BytesCodec", + "BytesCodecConfiguration", + "BytesCodecName", + "Endian", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/crc32c.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/crc32c.py new file mode 100644 index 0000000000..573e56b356 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/crc32c.py @@ -0,0 +1,36 @@ +""" +CRC32C codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/crc32c/index.html + +The CRC32C codec has no configuration fields, so the `configuration` +key is absent from the metadata. +""" + +from typing import Final, Literal, NotRequired, TypedDict + +from zarr_metadata.common import JSON + +CRC32C_CODEC_NAME: Final = "crc32c" +"""The `name` field value of the `crc32c` codec.""" + +Crc32cCodecName = Literal["crc32c"] +"""Literal type of the `name` field of the `crc32c` codec.""" + + +class Crc32cCodec(TypedDict): + """`crc32c` codec metadata. + + Per spec the codec has no configuration fields. `configuration` is + optional and, if present, should be an empty mapping. + """ + + name: Crc32cCodecName + configuration: NotRequired[dict[str, JSON]] + + +__all__ = [ + "CRC32C_CODEC_NAME", + "Crc32cCodec", + "Crc32cCodecName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py new file mode 100644 index 0000000000..16d9c5c5d1 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py @@ -0,0 +1,40 @@ +""" +Gzip codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/index.html +""" + +from typing import Final, Literal, NotRequired, TypedDict + +GZIP_CODEC_NAME: Final = "gzip" +"""The `name` field value of the `gzip` codec.""" + +GzipCodecName = Literal["gzip"] +"""Literal type of the `name` field of the `gzip` codec.""" + + +class GzipCodecConfiguration(TypedDict): + """ + Configuration for the Zarr v3 `gzip` codec. + + `level` is an integer in the range 0-9; 0 disables compression and 9 + is slowest with the best compression ratio. The spec does not mandate + a default. + """ + + level: NotRequired[int] + + +class GzipCodec(TypedDict): + """`gzip` codec metadata.""" + + name: GzipCodecName + configuration: GzipCodecConfiguration + + +__all__ = [ + "GZIP_CODEC_NAME", + "GzipCodec", + "GzipCodecConfiguration", + "GzipCodecName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding.py new file mode 100644 index 0000000000..50aa6621eb --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding.py @@ -0,0 +1,61 @@ +""" +Sharding codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/index.html +""" + +from typing import Final, Literal, NotRequired, TypedDict + +from zarr_metadata.v3.codec import Codec + +SHARDING_CODEC_NAME: Final = "sharding_indexed" +"""The `name` field value of the `sharding_indexed` codec.""" + +ShardingCodecName = Literal["sharding_indexed"] +"""Literal type of the `name` field of the `sharding_indexed` codec.""" + +SHARDING_INDEX_LOCATION_START: Final = "start" +SHARDING_INDEX_LOCATION_END: Final = "end" + +IndexLocation = Literal["start", "end"] +"""Position of the shard index within the encoded shard.""" + + +class ShardingCodecConfiguration(TypedDict): + """ + Configuration for the Zarr v3 `sharding_indexed` codec. + + `chunk_shape` is the shape of inner chunks along each dimension; + it must evenly divide the shard shape. + + `codecs` is the codec pipeline applied to each inner chunk; exactly + one array-to-bytes codec is required. + + `index_codecs` is the codec pipeline applied to the shard index; + it must be deterministic (no variable-size compression). + + `index_location` defaults to `"end"` per the spec. + """ + + chunk_shape: tuple[int, ...] + codecs: tuple[Codec, ...] + index_codecs: tuple[Codec, ...] + index_location: NotRequired[IndexLocation] + + +class ShardingCodec(TypedDict): + """`sharding_indexed` codec metadata.""" + + name: ShardingCodecName + configuration: ShardingCodecConfiguration + + +__all__ = [ + "SHARDING_CODEC_NAME", + "SHARDING_INDEX_LOCATION_END", + "SHARDING_INDEX_LOCATION_START", + "IndexLocation", + "ShardingCodec", + "ShardingCodecConfiguration", + "ShardingCodecName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/transpose.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/transpose.py new file mode 100644 index 0000000000..ef792670c5 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/transpose.py @@ -0,0 +1,39 @@ +""" +Transpose codec types. + +See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/index.html +""" + +from typing import Final, Literal, TypedDict + +TRANSPOSE_CODEC_NAME: Final = "transpose" +"""The `name` field value of the `transpose` codec.""" + +TransposeCodecName = Literal["transpose"] +"""Literal type of the `name` field of the `transpose` codec.""" + + +class TransposeCodecConfiguration(TypedDict): + """ + Configuration for the Zarr v3 `transpose` codec. + + `order` is a permutation of the dimension indices 0..n-1 that + specifies the dimension reordering applied during encoding. + """ + + order: tuple[int, ...] + + +class TransposeCodec(TypedDict): + """`transpose` codec metadata.""" + + name: TransposeCodecName + configuration: TransposeCodecConfiguration + + +__all__ = [ + "TRANSPOSE_CODEC_NAME", + "TransposeCodec", + "TransposeCodecConfiguration", + "TransposeCodecName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/zstd.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/zstd.py new file mode 100644 index 0000000000..a659ca4cf1 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/zstd.py @@ -0,0 +1,41 @@ +""" +Zstandard codec types. + +See https://github.com/zarr-developers/zarr-specs/pull/256 (unmerged at +time of writing; the configuration shape below reflects the proposed +specification). +""" + +from typing import Final, Literal, TypedDict + +ZSTD_CODEC_NAME: Final = "zstd" +"""The `name` field value of the `zstd` codec.""" + +ZstdCodecName = Literal["zstd"] +"""Literal type of the `name` field of the `zstd` codec.""" + + +class ZstdCodecConfiguration(TypedDict): + """ + Configuration for the Zarr v3 `zstd` codec. + + Both fields are required per the proposed specification. + """ + + level: int + checksum: bool + + +class ZstdCodec(TypedDict): + """`zstd` codec metadata.""" + + name: ZstdCodecName + configuration: ZstdCodecConfiguration + + +__all__ = [ + "ZSTD_CODEC_NAME", + "ZstdCodec", + "ZstdCodecConfiguration", + "ZstdCodecName", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/consolidated.py b/packages/zarr-metadata/src/zarr_metadata/v3/consolidated.py new file mode 100644 index 0000000000..7fe4f0b8c2 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/consolidated.py @@ -0,0 +1,30 @@ +"""Zarr v3 consolidated metadata types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, TypedDict + +if TYPE_CHECKING: + from collections.abc import Mapping + + from zarr_metadata.v3.array import ArrayMetadataV3 + from zarr_metadata.v3.group import GroupMetadataV3 + + +class ConsolidatedMetadataV3(TypedDict): + """ + Inline consolidated metadata embedded in a v3 group. + + The `metadata` map contains only v3 array and group entries - v2 + entries are excluded by design. Mixing v2 entries into a v3 + consolidated metadata document is invalid per spec. + """ + + kind: Literal["inline"] + must_understand: Literal[False] + metadata: Mapping[str, ArrayMetadataV3 | GroupMetadataV3] + + +__all__ = [ + "ConsolidatedMetadataV3", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/__init__.py new file mode 100644 index 0000000000..c126e18ddb --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/__init__.py @@ -0,0 +1,25 @@ +""" +Zarr v3 data type spec types. + +Each v3 data type has its own submodule: + +- Core primitives: `bool`, `int8`/`16`/`32`/`64`, `uint8`/`16`/`32`/`64`, + `float16`/`32`/`64`, `complex64`/`128`, `raw` (for `r`) +- zarr-extensions: `bytes`, `string`, `numpy_datetime64`, `numpy_timedelta64`, + `struct` + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from collections.abc import Mapping, Sequence + +# Wider than the top-level JSON because TypedDicts used for data type +# configurations are assignable to Mapping[str, object], not to +# Mapping[str, JSON]. +DType = str | int | float | Sequence["DType"] | None | Mapping[str, object] +"""The widest JSON-like shape that can specify a Zarr v3 data type.""" + + +__all__ = [ + "DType", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bool.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bool.py new file mode 100644 index 0000000000..48042b612a --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bool.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `bool` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +BOOL_DTYPE_NAME: Final = "bool" +"""The `data_type` value for the `bool` type.""" + +BoolDTypeName = Literal["bool"] +"""Literal type of the `data_type` field for `bool`.""" + +BoolFillValue = bool +"""Permitted JSON shape of the `fill_value` field for `bool`: a JSON boolean.""" + + +__all__ = [ + "BOOL_DTYPE_NAME", + "BoolDTypeName", + "BoolFillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bytes.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bytes.py new file mode 100644 index 0000000000..1295f22234 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/bytes.py @@ -0,0 +1,48 @@ +""" +Zarr `bytes` data type (variable-length raw bytes, zarr-extensions). + +See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/bytes +""" + +import re +from typing import Final, Literal, NewType + +BYTES_DTYPE_NAME: Final = "bytes" +"""The `data_type` value for the variable-length `bytes` type.""" + +BytesDTypeName = Literal["bytes"] +"""Literal type of the `data_type` field for `bytes`.""" + +Base64Bytes = NewType("Base64Bytes", str) +"""A standard-alphabet base64-encoded byte sequence.""" + +_BASE64_RE: Final = re.compile(r"^[A-Za-z0-9+/]*={0,2}$") + + +def base64_bytes(value: str) -> Base64Bytes: + """Validate `value` as a Base64Bytes and brand it. + + Raises ValueError if `value` is not standard-alphabet base64 + (length must be a multiple of 4 once padded; only `A-Z`, `a-z`, + `0-9`, `+`, `/`, and trailing `=` padding are permitted). + """ + if len(value) % 4 != 0 or not _BASE64_RE.fullmatch(value): + raise ValueError(f"Expected standard-alphabet base64, got {value!r}") + return Base64Bytes(value) + + +BytesFillValue = tuple[int, ...] | Base64Bytes +"""Permitted JSON shape of the `fill_value` field for `bytes`. + +Either a JSON array of integers in `[0, 255]` (one per byte), or a +`Base64Bytes` string encoding the byte sequence. +""" + + +__all__ = [ + "BYTES_DTYPE_NAME", + "Base64Bytes", + "BytesDTypeName", + "BytesFillValue", + "base64_bytes", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex128.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex128.py new file mode 100644 index 0000000000..3f5827bde6 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex128.py @@ -0,0 +1,37 @@ +""" +Zarr v3 `complex128` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +from zarr_metadata.v3.data_type.float64 import Float64FillValue + +COMPLEX128_DTYPE_NAME: Final = "complex128" +"""The `data_type` value for the `complex128` type.""" + +Complex128DTypeName = Literal["complex128"] +"""Literal type of the `data_type` field for `complex128`.""" + +Complex128Component = Float64FillValue +"""One real or imaginary component of a `complex128` fill value. + +Same shape as a `float64` fill value: a JSON number, a named sentinel, +or a `HexFloat64` string. +""" + +Complex128FillValue = tuple[Complex128Component, Complex128Component] +"""Permitted JSON shape of the `fill_value` field for `complex128`. + +A two-element JSON array `[real, imag]` where each component is a +`Complex128Component`. +""" + + +__all__ = [ + "COMPLEX128_DTYPE_NAME", + "Complex128Component", + "Complex128DTypeName", + "Complex128FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex64.py new file mode 100644 index 0000000000..7409fdd452 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/complex64.py @@ -0,0 +1,37 @@ +""" +Zarr v3 `complex64` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +from zarr_metadata.v3.data_type.float32 import Float32FillValue + +COMPLEX64_DTYPE_NAME: Final = "complex64" +"""The `data_type` value for the `complex64` type.""" + +Complex64DTypeName = Literal["complex64"] +"""Literal type of the `data_type` field for `complex64`.""" + +Complex64Component = Float32FillValue +"""One real or imaginary component of a `complex64` fill value. + +Same shape as a `float32` fill value: a JSON number, a named sentinel, +or a `HexFloat32` string. +""" + +Complex64FillValue = tuple[Complex64Component, Complex64Component] +"""Permitted JSON shape of the `fill_value` field for `complex64`. + +A two-element JSON array `[real, imag]` where each component is a +`Complex64Component`. +""" + + +__all__ = [ + "COMPLEX64_DTYPE_NAME", + "Complex64Component", + "Complex64DTypeName", + "Complex64FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float16.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float16.py new file mode 100644 index 0000000000..dac9eed09b --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float16.py @@ -0,0 +1,53 @@ +""" +Zarr v3 `float16` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +import re +from typing import Final, Literal, NewType + +FLOAT16_DTYPE_NAME: Final = "float16" +"""The `data_type` value for the `float16` type.""" + +Float16DTypeName = Literal["float16"] +"""Literal type of the `data_type` field for `float16`.""" + +Float16SpecialFillValue = Literal["NaN", "Infinity", "-Infinity"] +"""Named non-finite fill values permitted by the spec for IEEE 754 floats.""" + +HexFloat16 = NewType("HexFloat16", str) +"""A 6-character hex string (`0x` + 4 hex digits) encoding the +unsigned-integer representation of a float16.""" + +_HEX_FLOAT16_RE: Final = re.compile(r"^0x[0-9a-fA-F]{4}$") + + +def hex_float16(value: str) -> HexFloat16: + """Validate `value` as a HexFloat16 and brand it. + + Raises ValueError if `value` is not exactly `0x` followed by 4 hex + digits. + """ + if not _HEX_FLOAT16_RE.fullmatch(value): + raise ValueError(f"Expected '0x' followed by 4 hex digits, got {value!r}") + return HexFloat16(value) + + +Float16FillValue = float | int | Float16SpecialFillValue | HexFloat16 +"""Permitted JSON shape of the `fill_value` field for `float16`. + +Either a JSON number, one of the named non-finite sentinels (`"NaN"`, +`"Infinity"`, `"-Infinity"`), or a `HexFloat16` (`0xYYYY` string encoding +the unsigned-integer representation of the IEEE 754 value). +""" + + +__all__ = [ + "FLOAT16_DTYPE_NAME", + "Float16DTypeName", + "Float16FillValue", + "Float16SpecialFillValue", + "HexFloat16", + "hex_float16", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float32.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float32.py new file mode 100644 index 0000000000..37bc19243e --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float32.py @@ -0,0 +1,53 @@ +""" +Zarr v3 `float32` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +import re +from typing import Final, Literal, NewType + +FLOAT32_DTYPE_NAME: Final = "float32" +"""The `data_type` value for the `float32` type.""" + +Float32DTypeName = Literal["float32"] +"""Literal type of the `data_type` field for `float32`.""" + +Float32SpecialFillValue = Literal["NaN", "Infinity", "-Infinity"] +"""Named non-finite fill values permitted by the spec for IEEE 754 floats.""" + +HexFloat32 = NewType("HexFloat32", str) +"""A 10-character hex string (`0x` + 8 hex digits) encoding the +unsigned-integer representation of a float32.""" + +_HEX_FLOAT32_RE: Final = re.compile(r"^0x[0-9a-fA-F]{8}$") + + +def hex_float32(value: str) -> HexFloat32: + """Validate `value` as a HexFloat32 and brand it. + + Raises ValueError if `value` is not exactly `0x` followed by 8 hex + digits. + """ + if not _HEX_FLOAT32_RE.fullmatch(value): + raise ValueError(f"Expected '0x' followed by 8 hex digits, got {value!r}") + return HexFloat32(value) + + +Float32FillValue = float | int | Float32SpecialFillValue | HexFloat32 +"""Permitted JSON shape of the `fill_value` field for `float32`. + +Either a JSON number, one of the named non-finite sentinels (`"NaN"`, +`"Infinity"`, `"-Infinity"`), or a `HexFloat32` (`0xYYYYYYYY` string +encoding the unsigned-integer representation of the IEEE 754 value). +""" + + +__all__ = [ + "FLOAT32_DTYPE_NAME", + "Float32DTypeName", + "Float32FillValue", + "Float32SpecialFillValue", + "HexFloat32", + "hex_float32", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float64.py new file mode 100644 index 0000000000..28e77568af --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/float64.py @@ -0,0 +1,54 @@ +""" +Zarr v3 `float64` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +import re +from typing import Final, Literal, NewType + +FLOAT64_DTYPE_NAME: Final = "float64" +"""The `data_type` value for the `float64` type.""" + +Float64DTypeName = Literal["float64"] +"""Literal type of the `data_type` field for `float64`.""" + +Float64SpecialFillValue = Literal["NaN", "Infinity", "-Infinity"] +"""Named non-finite fill values permitted by the spec for IEEE 754 floats.""" + +HexFloat64 = NewType("HexFloat64", str) +"""An 18-character hex string (`0x` + 16 hex digits) encoding the +unsigned-integer representation of a float64.""" + +_HEX_FLOAT64_RE: Final = re.compile(r"^0x[0-9a-fA-F]{16}$") + + +def hex_float64(value: str) -> HexFloat64: + """Validate `value` as a HexFloat64 and brand it. + + Raises ValueError if `value` is not exactly `0x` followed by 16 hex + digits. + """ + if not _HEX_FLOAT64_RE.fullmatch(value): + raise ValueError(f"Expected '0x' followed by 16 hex digits, got {value!r}") + return HexFloat64(value) + + +Float64FillValue = float | int | Float64SpecialFillValue | HexFloat64 +"""Permitted JSON shape of the `fill_value` field for `float64`. + +Either a JSON number, one of the named non-finite sentinels (`"NaN"`, +`"Infinity"`, `"-Infinity"`), or a `HexFloat64` (`0xYYYYYYYYYYYYYYYY` +string encoding the unsigned-integer representation of the IEEE 754 +value). +""" + + +__all__ = [ + "FLOAT64_DTYPE_NAME", + "Float64DTypeName", + "Float64FillValue", + "Float64SpecialFillValue", + "HexFloat64", + "hex_float64", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int16.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int16.py new file mode 100644 index 0000000000..e0488ab56e --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int16.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `int16` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +INT16_DTYPE_NAME: Final = "int16" +"""The `data_type` value for the `int16` type.""" + +Int16DTypeName = Literal["int16"] +"""Literal type of the `data_type` field for `int16`.""" + +Int16FillValue = int +"""Permitted JSON shape of the `fill_value` field for `int16`: a JSON integer in [-32768, 32767].""" + + +__all__ = [ + "INT16_DTYPE_NAME", + "Int16DTypeName", + "Int16FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int32.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int32.py new file mode 100644 index 0000000000..762561a4b1 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int32.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `int32` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +INT32_DTYPE_NAME: Final = "int32" +"""The `data_type` value for the `int32` type.""" + +Int32DTypeName = Literal["int32"] +"""Literal type of the `data_type` field for `int32`.""" + +Int32FillValue = int +"""Permitted JSON shape of the `fill_value` field for `int32`: a JSON integer in [-2**31, 2**31 - 1].""" + + +__all__ = [ + "INT32_DTYPE_NAME", + "Int32DTypeName", + "Int32FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int64.py new file mode 100644 index 0000000000..4699d4681c --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int64.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `int64` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +INT64_DTYPE_NAME: Final = "int64" +"""The `data_type` value for the `int64` type.""" + +Int64DTypeName = Literal["int64"] +"""Literal type of the `data_type` field for `int64`.""" + +Int64FillValue = int +"""Permitted JSON shape of the `fill_value` field for `int64`: a JSON integer in [-2**63, 2**63 - 1].""" + + +__all__ = [ + "INT64_DTYPE_NAME", + "Int64DTypeName", + "Int64FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int8.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int8.py new file mode 100644 index 0000000000..84a788a963 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/int8.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `int8` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +INT8_DTYPE_NAME: Final = "int8" +"""The `data_type` value for the `int8` type.""" + +Int8DTypeName = Literal["int8"] +"""Literal type of the `data_type` field for `int8`.""" + +Int8FillValue = int +"""Permitted JSON shape of the `fill_value` field for `int8`: a JSON integer in [-128, 127].""" + + +__all__ = [ + "INT8_DTYPE_NAME", + "Int8DTypeName", + "Int8FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py new file mode 100644 index 0000000000..d6e77a7fb7 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py @@ -0,0 +1,61 @@ +""" +Zarr `numpy.datetime64` data type (zarr-extensions). + +See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.datetime64 +""" + +from typing import Final, Literal, TypedDict + +from typing_extensions import ReadOnly + +NUMPY_DATETIME64_DTYPE_NAME: Final = "numpy.datetime64" +"""The `name` field value of the `numpy.datetime64` data type.""" + +NumpyDatetime64DTypeName = Literal["numpy.datetime64"] +"""Literal type of the `name` field of the `numpy.datetime64` data type.""" + +DateTimeUnit = Literal[ + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic" +] +"""Time unit codes used by numpy.datetime64.""" + + +class NumpyDatetime64Configuration(TypedDict): + """ + Configuration for the `numpy.datetime64` data type. + + Attributes + ---------- + unit + A string encoding a unit of time. + scale_factor + The multiplier relative to the unit. + """ + + unit: ReadOnly[DateTimeUnit] + scale_factor: ReadOnly[int] + + +class NumpyDatetime64(TypedDict): + """`numpy.datetime64` data type metadata.""" + + name: NumpyDatetime64DTypeName + configuration: NumpyDatetime64Configuration + + +NumpyDatetime64FillValue = int | Literal["NaT"] +"""Permitted JSON shape of the `fill_value` field for `numpy.datetime64`. + +Either a JSON integer (count of `unit * scale_factor` since the epoch), +or the string `"NaT"` (equivalent to the integer `-2**63`). +""" + + +__all__ = [ + "NUMPY_DATETIME64_DTYPE_NAME", + "DateTimeUnit", + "NumpyDatetime64", + "NumpyDatetime64Configuration", + "NumpyDatetime64DTypeName", + "NumpyDatetime64FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py new file mode 100644 index 0000000000..535b231490 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py @@ -0,0 +1,61 @@ +""" +Zarr `numpy.timedelta64` data type (zarr-extensions). + +See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.timedelta64 +""" + +from typing import Final, Literal, TypedDict + +from typing_extensions import ReadOnly + +NUMPY_TIMEDELTA64_DTYPE_NAME: Final = "numpy.timedelta64" +"""The `name` field value of the `numpy.timedelta64` data type.""" + +NumpyTimedelta64DTypeName = Literal["numpy.timedelta64"] +"""Literal type of the `name` field of the `numpy.timedelta64` data type.""" + +DateTimeUnit = Literal[ + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic" +] +"""Time unit codes used by numpy.timedelta64.""" + + +class NumpyTimedelta64Configuration(TypedDict): + """ + Configuration for the `numpy.timedelta64` data type. + + Attributes + ---------- + unit + A string encoding a unit of time. + scale_factor + The multiplier relative to the unit. + """ + + unit: ReadOnly[DateTimeUnit] + scale_factor: ReadOnly[int] + + +class NumpyTimedelta64(TypedDict): + """`numpy.timedelta64` data type metadata.""" + + name: NumpyTimedelta64DTypeName + configuration: NumpyTimedelta64Configuration + + +NumpyTimedelta64FillValue = int | Literal["NaT"] +"""Permitted JSON shape of the `fill_value` field for `numpy.timedelta64`. + +Either a JSON integer (a count of `unit * scale_factor`), or the string +`"NaT"` (equivalent to the integer `-2**63`). +""" + + +__all__ = [ + "NUMPY_TIMEDELTA64_DTYPE_NAME", + "DateTimeUnit", + "NumpyTimedelta64", + "NumpyTimedelta64Configuration", + "NumpyTimedelta64DTypeName", + "NumpyTimedelta64FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/raw.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/raw.py new file mode 100644 index 0000000000..0b6cb0cc42 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/raw.py @@ -0,0 +1,45 @@ +""" +Zarr v3 `r` raw-bytes data type (parameterised by bit count). + +The `data_type` value is a string of the form `r` where `N` is a +positive multiple of 8 (e.g. `r8`, `r16`, `r24`). + +See https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html +""" + +import re +from typing import Final, NewType + +RawBytesDTypeName = NewType("RawBytesDTypeName", str) +"""A spec-conformant `r` raw-bytes name (e.g. `"r8"`, `"r16"`).""" + +_RAW_BYTES_RE: Final = re.compile(r"^r(\d+)$") + + +def raw_bytes_dtype_name(value: str) -> RawBytesDTypeName: + """Validate `value` as a `r` raw-bytes name and brand it. + + Raises ValueError if `value` is not `r` followed by a positive + multiple of 8. + """ + match = _RAW_BYTES_RE.fullmatch(value) + if match is None: + raise ValueError(f"Expected 'r' followed by a positive integer, got {value!r}") + bits = int(match.group(1)) + if bits == 0 or bits % 8 != 0: + raise ValueError(f"Expected 'r' where N is a positive multiple of 8, got {value!r}") + return RawBytesDTypeName(value) + + +RawBytesFillValue = tuple[int, ...] +"""Permitted JSON shape of the `fill_value` field for `r`. + +A JSON array of N/8 integers in `[0, 255]` (one per byte). +""" + + +__all__ = [ + "RawBytesDTypeName", + "RawBytesFillValue", + "raw_bytes_dtype_name", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/string.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/string.py new file mode 100644 index 0000000000..82347cde9f --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/string.py @@ -0,0 +1,23 @@ +""" +Zarr `string` data type (variable-length utf-8, zarr-extensions). + +See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/string +""" + +from typing import Final, Literal + +STRING_DTYPE_NAME: Final = "string" +"""The `data_type` value for the `string` type.""" + +StringDTypeName = Literal["string"] +"""Literal type of the `data_type` field for `string`.""" + +StringFillValue = str +"""Permitted JSON shape of the `fill_value` field for `string`: a JSON unicode string.""" + + +__all__ = [ + "STRING_DTYPE_NAME", + "StringDTypeName", + "StringFillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py new file mode 100644 index 0000000000..414c88d4c0 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py @@ -0,0 +1,67 @@ +""" +Zarr `struct` data type (heterogeneous record, zarr-extensions). + +See https://github.com/zarr-developers/zarr-extensions/blob/main/data-types/struct/README.md +""" + +from collections.abc import Mapping +from typing import Final, Literal, TypedDict + +from typing_extensions import ReadOnly + +from zarr_metadata.common import JSON +from zarr_metadata.v3.data_type import DType + +STRUCT_DTYPE_NAME: Final = "struct" +"""The `name` field value of the `struct` data type.""" + +StructDTypeName = Literal["struct"] +"""Literal type of the `name` field of the `struct` data type.""" + + +class StructField(TypedDict): + """ + A single field entry inside a structured dtype. + + Attributes + ---------- + name + The field name (must be unique within a struct and non-empty). + data_type + The field's data type. Recursive: may be a bare-string primitive + or a named-config envelope including another `struct`. + """ + + name: ReadOnly[str] + data_type: ReadOnly[DType] + + +class StructConfiguration(TypedDict): + """Configuration for the `struct` data type.""" + + fields: ReadOnly[tuple[StructField, ...]] + + +class Struct(TypedDict): + """`struct` data type metadata.""" + + name: StructDTypeName + configuration: StructConfiguration + + +StructFillValue = Mapping[str, JSON] +"""Permitted JSON shape of the `fill_value` field for `struct`. + +A JSON object mapping each field name to that field's fill value. Field +fill values are themselves shaped per the field's `data_type`, recursively. +""" + + +__all__ = [ + "STRUCT_DTYPE_NAME", + "Struct", + "StructConfiguration", + "StructDTypeName", + "StructField", + "StructFillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint16.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint16.py new file mode 100644 index 0000000000..541f3ecb29 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint16.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `uint16` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +UINT16_DTYPE_NAME: Final = "uint16" +"""The `data_type` value for the `uint16` type.""" + +Uint16DTypeName = Literal["uint16"] +"""Literal type of the `data_type` field for `uint16`.""" + +Uint16FillValue = int +"""Permitted JSON shape of the `fill_value` field for `uint16`: a JSON integer in [0, 65535].""" + + +__all__ = [ + "UINT16_DTYPE_NAME", + "Uint16DTypeName", + "Uint16FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint32.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint32.py new file mode 100644 index 0000000000..6aaca8cdb8 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint32.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `uint32` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +UINT32_DTYPE_NAME: Final = "uint32" +"""The `data_type` value for the `uint32` type.""" + +Uint32DTypeName = Literal["uint32"] +"""Literal type of the `data_type` field for `uint32`.""" + +Uint32FillValue = int +"""Permitted JSON shape of the `fill_value` field for `uint32`: a JSON integer in [0, 2**32 - 1].""" + + +__all__ = [ + "UINT32_DTYPE_NAME", + "Uint32DTypeName", + "Uint32FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint64.py new file mode 100644 index 0000000000..80bfafcabb --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint64.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `uint64` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +UINT64_DTYPE_NAME: Final = "uint64" +"""The `data_type` value for the `uint64` type.""" + +Uint64DTypeName = Literal["uint64"] +"""Literal type of the `data_type` field for `uint64`.""" + +Uint64FillValue = int +"""Permitted JSON shape of the `fill_value` field for `uint64`: a JSON integer in [0, 2**64 - 1].""" + + +__all__ = [ + "UINT64_DTYPE_NAME", + "Uint64DTypeName", + "Uint64FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint8.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint8.py new file mode 100644 index 0000000000..8410b60727 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/uint8.py @@ -0,0 +1,23 @@ +""" +Zarr v3 `uint8` data type. + +See https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html +""" + +from typing import Final, Literal + +UINT8_DTYPE_NAME: Final = "uint8" +"""The `data_type` value for the `uint8` type.""" + +Uint8DTypeName = Literal["uint8"] +"""Literal type of the `data_type` field for `uint8`.""" + +Uint8FillValue = int +"""Permitted JSON shape of the `fill_value` field for `uint8`: a JSON integer in [0, 255].""" + + +__all__ = [ + "UINT8_DTYPE_NAME", + "Uint8DTypeName", + "Uint8FillValue", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/group.py b/packages/zarr-metadata/src/zarr_metadata/v3/group.py new file mode 100644 index 0000000000..71a61932fa --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v3/group.py @@ -0,0 +1,31 @@ +"""Zarr v3 group metadata types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, NotRequired + +from typing_extensions import TypedDict + +if TYPE_CHECKING: + from collections.abc import Mapping + + from zarr_metadata.common import JSON + +from zarr_metadata.v3.array import AllowedExtraField + + +class GroupMetadataV3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg] + """ + Zarr v3 group metadata document (the `zarr.json` content for a group). + + Extra keys are permitted if they conform to `AllowedExtraField`. + """ + + zarr_format: Literal[3] + node_type: Literal["group"] + attributes: NotRequired[Mapping[str, JSON]] + + +__all__ = [ + "GroupMetadataV3", +] diff --git a/packages/zarr-metadata/tests/test_imports.py b/packages/zarr-metadata/tests/test_imports.py new file mode 100644 index 0000000000..fa439f23e5 --- /dev/null +++ b/packages/zarr-metadata/tests/test_imports.py @@ -0,0 +1,15 @@ +""" +Smoke test: the package imports and its top-level public surface is reachable. +""" + +from __future__ import annotations + + +def test_package_imports() -> None: + """The package and its top-level union types load without errors.""" + import zarr_metadata + + # Touch the cross-version unions to confirm both v2 and v3 submodules + # load and the top-level __init__ wires the union types correctly. + assert zarr_metadata.ArrayMetadata is not None + assert zarr_metadata.GroupMetadata is not None diff --git a/packages/zarr-metadata/tests/test_validators.py b/packages/zarr-metadata/tests/test_validators.py new file mode 100644 index 0000000000..e910959807 --- /dev/null +++ b/packages/zarr-metadata/tests/test_validators.py @@ -0,0 +1,74 @@ +""" +Tests for the runtime validators in zarr-metadata. + +The TypedDicts and Final constants in this package have no runtime +behavior to test -- pyright (in CI) verifies their shapes. The only +runtime logic is the small set of validating constructors for spec +strings whose constraints can't be expressed as a Literal type. +""" + +from __future__ import annotations + +import pytest + + +def test_hex_float16_validator() -> None: + from zarr_metadata.v3.data_type.float16 import hex_float16 + + assert hex_float16("0x7c00") == "0x7c00" + with pytest.raises(ValueError): + hex_float16("0x7c") # too short + with pytest.raises(ValueError): + hex_float16("0X7C00") # uppercase 0X prefix not accepted + with pytest.raises(ValueError): + hex_float16("not hex") + + +def test_hex_float32_validator() -> None: + from zarr_metadata.v3.data_type.float32 import hex_float32 + + assert hex_float32("0x7fc00000") == "0x7fc00000" + with pytest.raises(ValueError): + hex_float32("0x7fc0") # too short + with pytest.raises(ValueError): + hex_float32("not hex") + + +def test_hex_float64_validator() -> None: + from zarr_metadata.v3.data_type.float64 import hex_float64 + + assert hex_float64("0x7ff8000000000000") == "0x7ff8000000000000" + with pytest.raises(ValueError): + hex_float64("0x7ff8") # too short + with pytest.raises(ValueError): + hex_float64("not hex") + + +def test_base64_bytes_validator() -> None: + from zarr_metadata.v3.data_type.bytes import base64_bytes + + assert base64_bytes("SGVsbG8=") == "SGVsbG8=" + assert base64_bytes("") == "" + assert base64_bytes("AAAA") == "AAAA" + + with pytest.raises(ValueError): + base64_bytes("not!base64") + with pytest.raises(ValueError): + base64_bytes("ABC") # length not a multiple of 4 + + +def test_raw_bytes_dtype_name_validator() -> None: + from zarr_metadata.v3.data_type.raw import raw_bytes_dtype_name + + assert raw_bytes_dtype_name("r8") == "r8" + assert raw_bytes_dtype_name("r16") == "r16" + assert raw_bytes_dtype_name("r256") == "r256" + + with pytest.raises(ValueError): + raw_bytes_dtype_name("r3") # not a multiple of 8 + with pytest.raises(ValueError): + raw_bytes_dtype_name("r0") # zero not allowed + with pytest.raises(ValueError): + raw_bytes_dtype_name("R8") # uppercase R not accepted + with pytest.raises(ValueError): + raw_bytes_dtype_name("8") # missing prefix