From 35537c5225fccc17cee08e8e44107de7fd082fc9 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 6 Apr 2026 18:04:12 +0200 Subject: [PATCH] refactor: define codec and data type classes upstream in a subpackage --- packages/zarr-interfaces/README.md | 17 ++ packages/zarr-interfaces/pyproject.toml | 17 ++ .../src/zarr_interfaces/__init__.py | 0 .../src/zarr_interfaces/codec/__init__.py | 0 .../src/zarr_interfaces/codec/v1.py | 202 +++++++++++++++++ .../src/zarr_interfaces/data_type/__init__.py | 0 .../src/zarr_interfaces/data_type/v1.py | 121 ++++++++++ .../src/zarr_interfaces/metadata/__init__.py | 0 .../src/zarr_interfaces/metadata/v1.py | 25 +++ pyproject.toml | 4 + src/zarr/abc/codec.py | 50 +++-- src/zarr/abc/metadata.py | 10 +- src/zarr/core/dtype/wrapper.py | 11 +- tests/test_zarr_interfaces.py | 210 ++++++++++++++++++ 14 files changed, 640 insertions(+), 27 deletions(-) create mode 100644 packages/zarr-interfaces/README.md create mode 100644 packages/zarr-interfaces/pyproject.toml create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/__init__.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/codec/__init__.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/codec/v1.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/data_type/__init__.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/data_type/v1.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/metadata/__init__.py create mode 100644 packages/zarr-interfaces/src/zarr_interfaces/metadata/v1.py create mode 100644 tests/test_zarr_interfaces.py diff --git a/packages/zarr-interfaces/README.md b/packages/zarr-interfaces/README.md new file mode 100644 index 0000000000..6bd6c79e5e --- /dev/null +++ b/packages/zarr-interfaces/README.md @@ -0,0 +1,17 @@ +# zarr-interfaces + +Interface definitions (ABCs and protocols) for zarr codecs and data types. + +This package provides the abstract base classes and protocols that external +codec and data type implementations should subclass or implement. It has +minimal dependencies (only numpy) and does not depend on zarr-python itself. + +## Usage + +```python +from zarr_interfaces.codec.v1 import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec +from zarr_interfaces.data_type.v1 import ZDType +``` + +Interfaces are versioned under a `v1` namespace to support future evolution +without breaking existing implementations. diff --git a/packages/zarr-interfaces/pyproject.toml b/packages/zarr-interfaces/pyproject.toml new file mode 100644 index 0000000000..8e12fec3b8 --- /dev/null +++ b/packages/zarr-interfaces/pyproject.toml @@ -0,0 +1,17 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "zarr-interfaces" +version = "0.1.0" +description = "Interface definitions (ABCs and protocols) for zarr codecs and data types" +readme = "README.md" +license = "BSD-3-Clause" +requires-python = ">=3.12" +dependencies = [ + "numpy>=2", +] + +[project.urls] +homepage = "https://github.com/zarr-developers/zarr-python" diff --git a/packages/zarr-interfaces/src/zarr_interfaces/__init__.py b/packages/zarr-interfaces/src/zarr_interfaces/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/packages/zarr-interfaces/src/zarr_interfaces/codec/__init__.py b/packages/zarr-interfaces/src/zarr_interfaces/codec/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/packages/zarr-interfaces/src/zarr_interfaces/codec/v1.py b/packages/zarr-interfaces/src/zarr_interfaces/codec/v1.py new file mode 100644 index 0000000000..cda3a71d0a --- /dev/null +++ b/packages/zarr-interfaces/src/zarr_interfaces/codec/v1.py @@ -0,0 +1,202 @@ +"""Codec interface definitions (v1). + +This module defines the abstract interfaces for zarr codecs. +External codec implementations should subclass ``ArrayArrayCodec``, +``ArrayBytesCodec``, or ``BytesBytesCodec`` from this module. + +The ``Buffer`` and ``NDBuffer`` types here are protocols — they define +the structural interface that zarr's concrete buffer types implement. +Codec authors should type against these protocols, not zarr's concrete +buffer classes. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING, ClassVar, Protocol, Self, runtime_checkable + +if TYPE_CHECKING: + import numpy as np + import numpy.typing as npt + + from zarr_interfaces.data_type.v1 import JSON, TBaseDType, TBaseScalar, ZDType + + +# --------------------------------------------------------------------------- +# Buffer protocols +# --------------------------------------------------------------------------- + + +class Buffer(Protocol): + """Protocol for a flat contiguous memory block (bytes-like).""" + + def __len__(self) -> int: ... + def __getitem__(self, key: slice) -> Buffer: ... + + +class NDBuffer(Protocol): + """Protocol for an N-dimensional array buffer.""" + + @property + def dtype(self) -> np.dtype[np.generic]: ... + + @property + def shape(self) -> tuple[int, ...]: ... + + def as_ndarray_like(self) -> npt.NDArray[np.generic]: ... + + @classmethod + def from_ndarray_like(cls, data: npt.NDArray[np.generic]) -> NDBuffer: ... + + def transpose(self, axes: tuple[int, ...]) -> NDBuffer: ... + + def __getitem__(self, key: object) -> NDBuffer: ... + + def __setitem__(self, key: object, value: object) -> None: ... + + +# --------------------------------------------------------------------------- +# ArraySpec protocol +# --------------------------------------------------------------------------- + + +class ArraySpec(Protocol): + """Protocol for the specification of a chunk's metadata.""" + + @property + def shape(self) -> tuple[int, ...]: ... + + @property + def dtype(self) -> ZDType[TBaseDType, TBaseScalar]: ... + + @property + def fill_value(self) -> object: ... + + @property + def ndim(self) -> int: ... + + +# --------------------------------------------------------------------------- +# Codec input/output type aliases +# --------------------------------------------------------------------------- + +type CodecInput = NDBuffer | Buffer +type CodecOutput = NDBuffer | Buffer + + +# --------------------------------------------------------------------------- +# Sync codec protocol +# --------------------------------------------------------------------------- + + +@runtime_checkable +class SupportsSyncCodec[CI: CodecInput, CO: CodecOutput](Protocol): + """Protocol for codecs that support synchronous encode/decode. + + The type parameters mirror ``BaseCodec``: ``CI`` is the decoded type + and ``CO`` is the encoded type. + """ + + def _decode_sync(self, chunk_data: CO, chunk_spec: ArraySpec) -> CI: ... + + def _encode_sync(self, chunk_data: CI, chunk_spec: ArraySpec) -> CO | None: ... + + +# --------------------------------------------------------------------------- +# Codec ABCs +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class BaseCodec[CI: CodecInput, CO: CodecOutput](ABC): + """Generic base class for codecs. + + Subclass ``ArrayArrayCodec``, ``ArrayBytesCodec``, or + ``BytesBytesCodec`` instead of this class directly. + """ + + is_fixed_size: ClassVar[bool] + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + """Create an instance from a JSON dictionary.""" + return cls(**data) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + """Serialize this codec to a JSON dictionary.""" + raise NotImplementedError + + @abstractmethod + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + """Return the encoded byte length for a given input byte length.""" + ... + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + """Return the chunk spec after encoding by this codec. + + Override this for codecs that change shape, dtype, or fill value. + """ + return chunk_spec + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + """Fill in codec parameters that can be inferred from array metadata.""" + return self + + def validate( + self, + *, + shape: tuple[int, ...], + dtype: ZDType[TBaseDType, TBaseScalar], + chunk_grid: object, + ) -> None: + """Validate that this codec is compatible with the array metadata. + + The default implementation does nothing. Override to add checks. + """ + + async def _decode_single(self, chunk_data: CO, chunk_spec: ArraySpec) -> CI: + """Decode a single chunk. Override this or ``_decode_sync``.""" + raise NotImplementedError + + async def decode( + self, + chunks_and_specs: Iterable[tuple[CO | None, ArraySpec]], + ) -> Iterable[CI | None]: + """Decode a batch of chunks.""" + results: list[CI | None] = [] + for chunk_data, chunk_spec in chunks_and_specs: + if chunk_data is not None: + results.append(await self._decode_single(chunk_data, chunk_spec)) + else: + results.append(None) + return results + + async def _encode_single(self, chunk_data: CI, chunk_spec: ArraySpec) -> CO | None: + """Encode a single chunk. Override this or ``_encode_sync``.""" + raise NotImplementedError + + async def encode( + self, + chunks_and_specs: Iterable[tuple[CI | None, ArraySpec]], + ) -> Iterable[CO | None]: + """Encode a batch of chunks.""" + results: list[CO | None] = [] + for chunk_data, chunk_spec in chunks_and_specs: + if chunk_data is not None: + results.append(await self._encode_single(chunk_data, chunk_spec)) + else: + results.append(None) + return results + + +class ArrayArrayCodec(BaseCodec[NDBuffer, NDBuffer]): + """Base class for array-to-array codecs (e.g. transpose, scale_offset).""" + + +class ArrayBytesCodec(BaseCodec[NDBuffer, Buffer]): + """Base class for array-to-bytes codecs (e.g. bytes, sharding).""" + + +class BytesBytesCodec(BaseCodec[Buffer, Buffer]): + """Base class for bytes-to-bytes codecs (e.g. gzip, zstd).""" diff --git a/packages/zarr-interfaces/src/zarr_interfaces/data_type/__init__.py b/packages/zarr-interfaces/src/zarr_interfaces/data_type/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/packages/zarr-interfaces/src/zarr_interfaces/data_type/v1.py b/packages/zarr-interfaces/src/zarr_interfaces/data_type/v1.py new file mode 100644 index 0000000000..2f75a95892 --- /dev/null +++ b/packages/zarr-interfaces/src/zarr_interfaces/data_type/v1.py @@ -0,0 +1,121 @@ +"""Data type interface definitions (v1). + +This module defines the abstract interface for zarr data types. +External data type implementations should subclass ``ZDType`` from this +module. The interface is intentionally minimal and stable. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import ( + ClassVar, + Literal, + Self, + TypeGuard, + overload, +) + +import numpy as np + +# JSON-like type for serialization +type JSON = str | int | float | bool | dict[str, JSON] | list[JSON] | None +type ZarrFormat = Literal[2, 3] + +# Bounds for the scalar and dtype type parameters +type TBaseScalar = np.generic | str | bytes +type TBaseDType = np.dtype[np.generic] + +# JSON representations of data types +type DTypeJSON = JSON +type DTypeSpec_V2 = str | list[tuple[str, DTypeJSON]] +type DTypeSpec_V3 = str | dict[str, JSON] + + +@dataclass(frozen=True, kw_only=True, slots=True) +class ZDType[DType: TBaseDType, Scalar: TBaseScalar](ABC): + """Abstract base class for wrapping native array data types. + + Subclasses must implement all abstract methods to support serialization, + deserialization, and scalar handling for their native data type. + + Type Parameters + --------------- + DType + The native data type (e.g. ``np.dtype[np.float64]``). + Scalar + The scalar type produced by this data type (e.g. ``np.float64``). + """ + + dtype_cls: ClassVar[type[TBaseDType]] + _zarr_v3_name: ClassVar[str] + + @classmethod + def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[DType]: + """Check that a native data type matches ``dtype_cls``.""" + return type(dtype) is cls.dtype_cls + + @classmethod + @abstractmethod + def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self: + """Create an instance from a native data type.""" + ... + + @abstractmethod + def to_native_dtype(self: Self) -> DType: + """Return the native data type wrapped by this instance.""" + ... + + @classmethod + @abstractmethod + def _from_json_v2(cls: type[Self], data: DTypeJSON) -> Self: ... + + @classmethod + @abstractmethod + def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: ... + + @classmethod + def from_json(cls: type[Self], data: DTypeJSON, *, zarr_format: ZarrFormat) -> Self: + """Create an instance from JSON metadata.""" + if zarr_format == 2: + return cls._from_json_v2(data) + if zarr_format == 3: + return cls._from_json_v3(data) + raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") + + @overload + def to_json(self, zarr_format: Literal[2]) -> DTypeSpec_V2: ... + + @overload + def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ... + + @abstractmethod + def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3: + """Serialize this data type to JSON.""" + ... + + @abstractmethod + def _check_scalar(self, data: object) -> bool: + """Check that a python object is a valid scalar for this data type.""" + ... + + @abstractmethod + def cast_scalar(self, data: object) -> Scalar: + """Cast a python object to the scalar type of this data type.""" + ... + + @abstractmethod + def default_scalar(self) -> Scalar: + """Return the default scalar value for this data type.""" + ... + + @abstractmethod + def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> Scalar: + """Deserialize a JSON value to a scalar.""" + ... + + @abstractmethod + def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON: + """Serialize a scalar value to JSON.""" + ... diff --git a/packages/zarr-interfaces/src/zarr_interfaces/metadata/__init__.py b/packages/zarr-interfaces/src/zarr_interfaces/metadata/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/packages/zarr-interfaces/src/zarr_interfaces/metadata/v1.py b/packages/zarr-interfaces/src/zarr_interfaces/metadata/v1.py new file mode 100644 index 0000000000..ffd403e8ef --- /dev/null +++ b/packages/zarr-interfaces/src/zarr_interfaces/metadata/v1.py @@ -0,0 +1,25 @@ +"""Metadata protocol (v1). + +Defines the structural interface for objects that can be serialized +to and deserialized from JSON dictionaries. +""" + +from __future__ import annotations + +from typing import Protocol, Self, runtime_checkable + +type JSON = str | int | float | bool | dict[str, JSON] | list[JSON] | None + + +@runtime_checkable +class Metadata(Protocol): + """Protocol for objects that round-trip through JSON dictionaries.""" + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + """Create an instance from a JSON dictionary.""" + ... + + def to_dict(self) -> dict[str, JSON]: + """Serialize to a JSON dictionary.""" + ... diff --git a/pyproject.toml b/pyproject.toml index 96932a9611..2357dfdb0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ 'google-crc32c>=1.5', 'typing_extensions>=4.12', 'donfig>=0.8', + 'zarr-interfaces>=0.1.0', ] dynamic = [ @@ -461,6 +462,9 @@ title_format = "## {version} ({project_date})" issue_format = "[#{issue}](https://github.com/zarr-developers/zarr-python/issues/{issue})" start_string = "\n" +[tool.uv.workspace] +members = ["packages/*"] + [tool.codespell] ignore-words-list = "astroid" diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 79c0dcf72e..139b63f653 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -2,11 +2,15 @@ from abc import abstractmethod from collections.abc import Mapping -from typing import TYPE_CHECKING, Literal, Protocol, TypeGuard, runtime_checkable +from typing import TYPE_CHECKING, Literal, TypeGuard from typing_extensions import ReadOnly, TypedDict +from zarr_interfaces.codec.v1 import ArrayArrayCodec as _ArrayArrayCodecInterface +from zarr_interfaces.codec.v1 import ArrayBytesCodec as _ArrayBytesCodecInterface +from zarr_interfaces.codec.v1 import BaseCodec as _BaseCodecInterface +from zarr_interfaces.codec.v1 import BytesBytesCodec as _BytesBytesCodecInterface +from zarr_interfaces.codec.v1 import SupportsSyncCodec -from zarr.abc.metadata import Metadata from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import NamedConfig, concurrent_map from zarr.core.config import config @@ -66,23 +70,7 @@ def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]: """The widest type of JSON-like input that could specify a codec.""" -@runtime_checkable -class SupportsSyncCodec[CI: CodecInput, CO: CodecOutput](Protocol): - """Protocol for codecs that support synchronous encode/decode. - - Codecs implementing this protocol provide `_decode_sync` and `_encode_sync` - methods that perform encoding/decoding without requiring an async event loop. - - The type parameters mirror `BaseCodec`: `CI` is the decoded type and `CO` is - the encoded type. - """ - - def _decode_sync(self, chunk_data: CO, chunk_spec: ArraySpec) -> CI: ... - - def _encode_sync(self, chunk_data: CI, chunk_spec: ArraySpec) -> CO | None: ... - - -class BaseCodec[CI: CodecInput, CO: CodecOutput](Metadata): +class BaseCodec[CI: CodecInput, CO: CodecOutput](_BaseCodecInterface[CI, CO]): """Generic base class for codecs. Codecs can be registered via zarr.codecs.registry. @@ -203,17 +191,35 @@ async def encode( return await _batching_helper(self._encode_single, chunks_and_specs) -class ArrayArrayCodec(BaseCodec[NDBuffer, NDBuffer]): +class ArrayArrayCodec(_ArrayArrayCodecInterface, BaseCodec[NDBuffer, NDBuffer]): """Base class for array-to-array codecs.""" + @classmethod + def __subclasshook__(cls, C: type) -> bool: + if cls is ArrayArrayCodec: + return _ArrayArrayCodecInterface in C.__mro__ + return NotImplemented + -class ArrayBytesCodec(BaseCodec[NDBuffer, Buffer]): +class ArrayBytesCodec(_ArrayBytesCodecInterface, BaseCodec[NDBuffer, Buffer]): """Base class for array-to-bytes codecs.""" + @classmethod + def __subclasshook__(cls, C: type) -> bool: + if cls is ArrayBytesCodec: + return _ArrayBytesCodecInterface in C.__mro__ + return NotImplemented + -class BytesBytesCodec(BaseCodec[Buffer, Buffer]): +class BytesBytesCodec(_BytesBytesCodecInterface, BaseCodec[Buffer, Buffer]): """Base class for bytes-to-bytes codecs.""" + @classmethod + def __subclasshook__(cls, C: type) -> bool: + if cls is BytesBytesCodec: + return _BytesBytesCodecInterface in C.__mro__ + return NotImplemented + Codec = ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index a56f986645..11bc66b9af 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -3,6 +3,8 @@ from collections.abc import Sequence from typing import TYPE_CHECKING +from zarr_interfaces.metadata.v1 import Metadata as MetadataProtocol + if TYPE_CHECKING: from typing import Self @@ -26,12 +28,14 @@ def to_dict(self) -> dict[str, JSON]: for field in fields(self): key = field.name value = getattr(self, key) - if isinstance(value, Metadata): - out_dict[field.name] = getattr(self, field.name).to_dict() + if isinstance(value, MetadataProtocol): + out_dict[key] = value.to_dict() elif isinstance(value, str): out_dict[key] = value elif isinstance(value, Sequence): - out_dict[key] = tuple(v.to_dict() if isinstance(v, Metadata) else v for v in value) + out_dict[key] = tuple( + v.to_dict() if isinstance(v, MetadataProtocol) else v for v in value + ) else: out_dict[key] = value diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py index 42d5d88473..a32e9f98c7 100644 --- a/src/zarr/core/dtype/wrapper.py +++ b/src/zarr/core/dtype/wrapper.py @@ -23,7 +23,7 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import ( TYPE_CHECKING, @@ -35,6 +35,7 @@ ) import numpy as np +from zarr_interfaces.data_type.v1 import ZDType as _ZDTypeBase if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat @@ -49,7 +50,7 @@ @dataclass(frozen=True, kw_only=True, slots=True) -class ZDType[DType: TBaseDType, Scalar: TBaseScalar](ABC): +class ZDType[DType: TBaseDType, Scalar: TBaseScalar](_ZDTypeBase[DType, Scalar]): """ Abstract base class for wrapping native array data types, e.g. numpy dtypes @@ -66,6 +67,12 @@ class variable, and it should generally be unique across different data types. dtype_cls: ClassVar[type[TBaseDType]] _zarr_v3_name: ClassVar[str] + @classmethod + def __subclasshook__(cls, C: type) -> bool: + if cls is ZDType: + return _ZDTypeBase in C.__mro__ + return NotImplemented + @classmethod def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[DType]: """ diff --git a/tests/test_zarr_interfaces.py b/tests/test_zarr_interfaces.py new file mode 100644 index 0000000000..1679c465f2 --- /dev/null +++ b/tests/test_zarr_interfaces.py @@ -0,0 +1,210 @@ +"""Tests that externally-defined codecs and data types (subclassing +zarr_interfaces, not zarr) are recognized by zarr's internal machinery. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, ClassVar, Literal, Self + +import numpy as np +from zarr_interfaces.codec.v1 import ArrayArrayCodec as IArrayArrayCodec +from zarr_interfaces.codec.v1 import ArrayBytesCodec as IArrayBytesCodec +from zarr_interfaces.codec.v1 import BytesBytesCodec as IBytesBytes +from zarr_interfaces.data_type.v1 import ZDType as IZDType +from zarr_interfaces.metadata.v1 import Metadata as IMetadata + +# --------------------------------------------------------------------------- +# Verify zarr's classes satisfy the interfaces +# --------------------------------------------------------------------------- + + +class TestZarrClassesSatisfyInterfaces: + def test_array_array_codec(self) -> None: + """zarr's ArrayArrayCodec is a subclass of the interface.""" + from zarr.abc.codec import ArrayArrayCodec + + assert issubclass(ArrayArrayCodec, IArrayArrayCodec) + + def test_array_bytes_codec(self) -> None: + """zarr's ArrayBytesCodec is a subclass of the interface.""" + from zarr.abc.codec import ArrayBytesCodec + + assert issubclass(ArrayBytesCodec, IArrayBytesCodec) + + def test_bytes_bytes_codec(self) -> None: + """zarr's BytesBytesCodec is a subclass of the interface.""" + from zarr.abc.codec import BytesBytesCodec + + assert issubclass(BytesBytesCodec, IBytesBytes) + + def test_zdtype(self) -> None: + """zarr's ZDType is a subclass of the interface.""" + from zarr.core.dtype.wrapper import ZDType + + assert issubclass(ZDType, IZDType) + + def test_concrete_dtype_is_interface_instance(self) -> None: + """A concrete zarr dtype is an instance of the interface ZDType.""" + from zarr.core.dtype.npy.float import Float64 + + assert isinstance(Float64(), IZDType) + + def test_metadata_protocol(self) -> None: + """zarr's Metadata class satisfies the Metadata protocol.""" + from zarr.abc.metadata import Metadata + + assert isinstance(Metadata, type) + # Metadata instances should satisfy the protocol + from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding + + enc = DefaultChunkKeyEncoding(separator="/") + assert isinstance(enc, IMetadata) + + def test_concrete_codec_is_interface_instance(self) -> None: + """A concrete zarr codec is an instance of the interface ABC.""" + from zarr.codecs.bytes import BytesCodec + + assert isinstance(BytesCodec(), IArrayBytesCodec) + + def test_concrete_bb_codec_is_interface_instance(self) -> None: + """A concrete zarr BytesBytesCodec is an instance of the interface ABC.""" + from zarr.codecs.gzip import GzipCodec + + assert isinstance(GzipCodec(), IBytesBytes) + + +# --------------------------------------------------------------------------- +# External codec defined using only zarr_interfaces +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class ExternalIdentityCodec(IArrayArrayCodec): + """An array-to-array codec defined using only zarr_interfaces. + Simulates what a third-party package would do. + """ + + is_fixed_size: ClassVar[bool] = True + + def compute_encoded_size(self, input_byte_length: int, chunk_spec: Any) -> int: + return input_byte_length + + def _decode_sync(self, chunk_data: Any, chunk_spec: Any) -> Any: + return chunk_data + + async def _decode_single(self, chunk_data: Any, chunk_spec: Any) -> Any: + return self._decode_sync(chunk_data, chunk_spec) + + def _encode_sync(self, chunk_data: Any, chunk_spec: Any) -> Any: + return chunk_data + + async def _encode_single(self, chunk_data: Any, chunk_spec: Any) -> Any: + return self._encode_sync(chunk_data, chunk_spec) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Self: + return cls() + + def to_dict(self) -> dict[str, Any]: + return {"name": "external_identity"} + + +class TestExternalCodecCompatibility: + def test_isinstance_zarr_abc(self) -> None: + """External codec passes isinstance against zarr's ABC.""" + from zarr.abc.codec import ArrayArrayCodec + + codec = ExternalIdentityCodec() + assert isinstance(codec, ArrayArrayCodec) + + def test_isinstance_interface(self) -> None: + """External codec passes isinstance against the interface.""" + codec = ExternalIdentityCodec() + assert isinstance(codec, IArrayArrayCodec) + + def test_codecs_from_list(self) -> None: + """External codec is correctly classified by codecs_from_list.""" + from zarr.codecs.bytes import BytesCodec + from zarr.core.codec_pipeline import codecs_from_list + + aa, ab, bb = codecs_from_list([ExternalIdentityCodec(), BytesCodec()]) + assert len(aa) == 1 + assert isinstance(aa[0], IArrayArrayCodec) + assert isinstance(ab, IArrayBytesCodec) + assert len(bb) == 0 + + def test_roundtrip_through_array(self) -> None: + """External codec works in a real zarr array encode/decode cycle.""" + import zarr + from zarr.registry import register_codec + + register_codec("external_identity", ExternalIdentityCodec) + + arr = zarr.create_array( + store={}, + shape=(10,), + dtype="float64", + chunks=(10,), + filters=[ExternalIdentityCodec()], + compressors=None, + fill_value=0, + ) + data = np.arange(10, dtype="float64") + arr[:] = data + np.testing.assert_array_equal(arr[:], data) + + +# --------------------------------------------------------------------------- +# External dtype defined using only zarr_interfaces +# --------------------------------------------------------------------------- + + +class TestExternalDtypeCompatibility: + def test_isinstance_zarr_zdtype(self) -> None: + """A class subclassing the interface ZDType passes isinstance against zarr's ZDType.""" + from zarr.core.dtype.wrapper import ZDType + + # We can't easily instantiate an abstract ZDType subclass without + # implementing all methods, but we can verify the class hierarchy + @dataclass(frozen=True, kw_only=True, slots=True) + class ExternalDType(IZDType[np.dtype[np.float32], np.float32]): + dtype_cls: ClassVar[type] = np.dtype + _zarr_v3_name: ClassVar[str] = "external_float32" + + @classmethod + def from_native_dtype(cls, dtype: Any) -> Self: + return cls() + + def to_native_dtype(self) -> np.dtype[np.float32]: + return np.dtype(np.float32) + + @classmethod + def _from_json_v2(cls, data: Any) -> Self: + return cls() + + @classmethod + def _from_json_v3(cls, data: Any) -> Self: + return cls() + + def to_json(self, zarr_format: Literal[2, 3]) -> Any: + return "external_float32" + + def _check_scalar(self, data: object) -> bool: + return isinstance(data, float | int | np.floating) + + def cast_scalar(self, data: object) -> np.float32: + return np.float32(data) # type: ignore[arg-type] + + def default_scalar(self) -> np.float32: + return np.float32(0) + + def from_json_scalar(self, data: Any, *, zarr_format: Literal[2, 3]) -> np.float32: + return np.float32(data) + + def to_json_scalar(self, data: object, *, zarr_format: Literal[2, 3]) -> Any: + return float(data) # type: ignore[arg-type] + + ext = ExternalDType() + assert isinstance(ext, IZDType) + assert isinstance(ext, ZDType)