From c0cede15e6499f7e90b21eb6322c26b516ff152c Mon Sep 17 00:00:00 2001 From: MikeLippincott <1michaell2017@gmail.com> Date: Mon, 20 Apr 2026 11:52:06 -0600 Subject: [PATCH] adding the API contracts --- .gitignore | 1 + README.md | 2 +- ROADMAP.md | 8 +- pyproject.toml | 2 + src/zedprofiler/contracts.py | 250 ++++++++++++++++-- tests/conftest.py | 360 ++++++++++++++++++++++++++ tests/test_contracts.py | 265 +++++++++++++++++-- tests/test_data_profiles.py | 28 ++ tests/test_featurization_scaffolds.py | 37 +++ tests/test_package_exports.py | 2 +- tests/test_profile_fixtures.py | 133 ++++++++++ uv.lock | 84 +++++- 12 files changed, 1125 insertions(+), 47 deletions(-) create mode 100644 tests/test_data_profiles.py create mode 100644 tests/test_featurization_scaffolds.py create mode 100644 tests/test_profile_fixtures.py diff --git a/.gitignore b/.gitignore index 0a19790..7d28f77 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ cython_debug/ # PyPI configuration file .pypirc +data/* diff --git a/README.md b/README.md index 2eb2537..b8c23b2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ZedProfiler -[![Coverage](https://img.shields.io/badge/coverage-87%25-green)](#quality-gates) +[![Coverage](https://img.shields.io/badge/coverage-99%25-brightgreen)](#quality-gates) CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling. diff --git a/ROADMAP.md b/ROADMAP.md index 99f0134..11e2f3e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -36,13 +36,13 @@ The roadmap is intended to be a living document and may be updated as needed. 1. PR 1: Packaging and environment baseline -- [ ] Python package scaffold, uv dependency management, version metadata 0.0.1, lint/test tooling, CI skeleton. -- [ ] Linux support and CPU-only scope statements in metadata and docs. +- [x] Python package scaffold, uv dependency management, version metadata 0.0.1, lint/test tooling, CI skeleton. +- [x] Linux support and CPU-only scope statements in metadata and docs. 2. PR 2: Core data model and API contracts -- [ ] Canonical input contracts, loader interfaces, common error types. -- [ ] Return schema contract (required keys, types, deterministic ordering). +- [x] Canonical input contracts, loader interfaces, common error types. +- [x] Return schema contract (required keys, types, deterministic ordering). 3. PR 3: RFC2119 naming specification and validators diff --git a/pyproject.toml b/pyproject.toml index 811b86e..57d053a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ dependencies = [ "fire>=0.7.1", "jinja2>=3.1.6", "pandas>=3.0.2", + "pyarrow>=23.0.1", + "tomli>=2.4.1", ] scripts.ZedProfiler = "ZedProfiler.cli:trigger" diff --git a/src/zedprofiler/contracts.py b/src/zedprofiler/contracts.py index ea4a654..4b2b192 100644 --- a/src/zedprofiler/contracts.py +++ b/src/zedprofiler/contracts.py @@ -1,31 +1,251 @@ """Core data contracts shared across featurizers. -The package accepts either: +The package accepts: - Single-channel 3D arrays shaped (z, y, x) -- Multi-channel 4D arrays shaped (c, z, y, x) """ from __future__ import annotations -from dataclasses import dataclass -from typing import Literal +import pathlib +from dataclasses import dataclass, field import numpy as np -import numpy.typing as npt +import tomli -DimensionOrder = Literal["zyx", "czyx"] +from zedprofiler.exceptions import ContractError +EXPECTED_SPATIAL_DIMS = 3 +TWO_DIMENSIONAL = 2 +FOUR_DIMENSIONAL = 4 +FIVE_OR_MORE_DIMENSIONS = 5 +REQUIRED_RETURN_KEYS = ("image_array", "features", "metadata") -@dataclass(frozen=True) -class ImageArrayContract: - """Document expected dimensionality and ordering for input arrays.""" - dimensions: int - order: DimensionOrder +def validate_image_array_shape_contracts( + arr: np.ndarray, +) -> bool: + """ + Validate the input array for dimensionality + Parameters + ---------- + arr : np.ndarray + Input array to validate -FloatArray = npt.NDArray[np.floating] -IntArray = npt.NDArray[np.integer] + Returns + ------- + bool + The status of the validation -SINGLE_CHANNEL_CONTRACT = ImageArrayContract(dimensions=3, order="zyx") -MULTI_CHANNEL_CONTRACT = ImageArrayContract(dimensions=4, order="czyx") + Raises + ------ + ContractError + If the input array does not meet the expected contract + """ + + arr_shape = arr.shape + if len(arr_shape) == TWO_DIMENSIONAL: + raise ContractError( + f"Input array has shape {arr_shape} with {TWO_DIMENSIONAL} dimensions. " + f"Expected {EXPECTED_SPATIAL_DIMS} dimensions." + ) + elif len(arr_shape) == FOUR_DIMENSIONAL and arr_shape[0] > 1: + raise ContractError( + f"Input array has shape {arr_shape} with {FOUR_DIMENSIONAL} dimensions, " + "but the first dimension (channels) has size " + f"{arr_shape[0]}. Expected a single-channel 3D array." + ) + elif ( + len(arr_shape) >= FIVE_OR_MORE_DIMENSIONS + and arr_shape[0] > 1 + and arr_shape[1] > 1 + ): + raise ContractError( + f"Input array has shape {arr_shape} with {len(arr_shape)} dimensions. " + f"Expected {EXPECTED_SPATIAL_DIMS} dimensions." + ) + + for dim_size in arr_shape: + if dim_size <= 0: + raise ContractError( + f"Input array has shape {arr_shape} with non-positive dimension size. " + "All dimensions must have size greater than 0." + ) + if sum(arr_shape) == len(arr_shape): + raise ContractError( + f"Input array has shape {arr_shape} with one or more dimensions of size 1. " + "Expected all three dimensions to have size greater than 1." + ) + return True + + +def validate_image_array_type_contracts( + arr: np.ndarray, +) -> bool: + """ + Validate the input array for type + + Parameters + ---------- + arr : np.ndarray + Input array to validate + + Returns + ------- + bool + The status of the validation + + Raises + ------ + ContractError + If the input array does not meet the expected contract + """ + if not isinstance(arr, np.ndarray): + raise ContractError(f"Input is of type {type(arr)}, expected a numpy array.") + # check for numeric dtype (int or float) in the array + if not np.issubdtype(arr.dtype, np.number): + raise ContractError( + f"Input array has dtype {arr.dtype}, expected a numeric dtype " + "(int or float)." + ) + return True + + +def validate_return_schema_contract( + result: dict[str, object], +) -> bool: + """Validate return schema keys, types, and deterministic key ordering.""" + if not isinstance(result, dict): + raise ContractError(f"Return result must be a dict, got {type(result)}.") + + actual_keys = tuple(result.keys()) + if actual_keys != REQUIRED_RETURN_KEYS: + raise ContractError( + "Return result keys must match required deterministic order " + f"{REQUIRED_RETURN_KEYS}, got {actual_keys}." + ) + + if not isinstance(result["image_array"], np.ndarray): + raise ContractError("Return result key 'image_array' must be a numpy array.") + if not isinstance(result["features"], dict): + raise ContractError("Return result key 'features' must be a dict.") + if not isinstance(result["metadata"], dict): + raise ContractError("Return result key 'metadata' must be a dict.") + + return True + + +@dataclass +class ExpectedValues: + """Expected values for feature naming validation tests.""" + + config_file_path: pathlib.Path + compartments: list[str] = field(default_factory=list) + channels: list[str] = field(default_factory=list) + features: list[str] = field(default_factory=list) + + def __post_init__(self) -> None: + """Load expected values from a TOML configuration file.""" + config = tomli.loads(self.config_file_path.read_text()) + self.compartments = list(set(config["expected_values"]["compartments"])) + self.channels = list(set(config["expected_values"]["channels"])) + # add "NoChannel" as a valid channel for metadata columns + # This is automatically added in the ZedProfiler + # regardless of input channel we want this added + # Add "NoChannel" as a valid channel for metadata columns. + self.channels.append("NoChannel") + self.features = [ + "AreaSizeShape", + "Correlation", + "Granularity", + "Intensity", + "Neighbors", + "Texture", + "SAMMed3D", + "CHAMMI-75", + ] + + def to_dict(self) -> dict[str, list[str]]: + """Return expected values as a dictionary.""" + return { + "compartments": self.compartments, + "channels": self.channels, + "features": self.features, + } + + +def validate_column_name_schema( + column_name: str, + expected_values_config_path: pathlib.Path, +) -> bool: + """ + Validate the column name schema for required fields and types + + Parameters + ---------- + column_name : str + The column name to validate + expected_values_config_path : pathlib.Path + Path to the configuration file containing expected values for validation + Returns + ------- + bool + The status of the validation + Raises + ------ + ContractError + If the column name does not meet the expected schema + """ + non_metadata_underscore_seperated_parts = 4 + metadata_underscore_seperated_parts = 3 + + expected_values = ExpectedValues(expected_values_config_path).to_dict() + # check if the column name is a string + if not isinstance(column_name, str): + raise ContractError(f"Column name must be a string, got {type(column_name)}") + + # check if the column name has at least 4 parts separated by underscores + parts = column_name.split("_") + if ( + len(parts) < non_metadata_underscore_seperated_parts + and "Metadata" not in column_name + ): + msg = ( + "Column name must have at least " + f"{non_metadata_underscore_seperated_parts} " + "parts separated by underscores, " + f"got {len(parts)} parts in '{column_name}'" + ) + raise ContractError(msg) + + if "Metadata" in column_name: + if len(parts) < metadata_underscore_seperated_parts: + raise ContractError( + "Metadata column name must have at least " + f"{metadata_underscore_seperated_parts} " + "parts separated by " + f"underscores, got {len(parts)} parts in '{column_name}'" + ) + return True + + compartment = parts[0] + channel = parts[1] + feature = parts[2] + + # check if the compartment is one of the expected values + expected_compartments = expected_values.get("compartments", []) + expected_channels = expected_values.get("channels", []) + expected_features = expected_values.get("features", []) + msg = ( + f"Compartment '{compartment}' is not in the expected values: " + f"{expected_compartments}" + ) + if compartment not in expected_compartments: + raise ContractError(msg) + msg = f"Channel '{channel}' is not in the expected values: {expected_channels}" + if channel not in expected_channels: + raise ContractError(msg) + msg = f"Feature '{feature}' is not in expected values: {expected_features}" + if feature not in expected_features: + raise ContractError(msg) + return True diff --git a/tests/conftest.py b/tests/conftest.py index 72658c5..1196dec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,9 +2,369 @@ conftest.py for pytest configuration. """ +from __future__ import annotations + +import sys +from pathlib import Path + +import numpy as np import pytest +# Add tests directory to path for imports +TEST_DIR = Path(__file__).parent +sys.path.insert(0, str(TEST_DIR)) + +# Import dataclass from test_data_profiles +from test_data_profiles import TestProfile # noqa: E402 + @pytest.fixture def my_data() -> str: return "Hello, differently!" + + +# ============================================================================ +# FIXTURE: Minimal valid profile +# ============================================================================ +@pytest.fixture +def minimal_profile() -> TestProfile: + """Minimal valid profile with required fields only.""" + return TestProfile( + image_array=np.random.rand(8, 16, 16), + features={}, + metadata={}, + ) + + +# ============================================================================ +# FIXTURE: Small 3D image profile +# ============================================================================ +@pytest.fixture +def small_image_profile() -> TestProfile: + """Profile with a small 3D image (z=4, y=8, x=8).""" + return TestProfile( + image_array=np.random.rand(4, 8, 8).astype(np.float32), + features={ + "Nuclei_DNA_Intensity_MeanIntensity": 0.512, + "Nuclei_DNA_Intensity_MaxIntensity": 0.987, + "Nuclei_DNA_Intensity_MinIntensity": 0.015, + }, + metadata={ + "Metadata_Object_ObjectID": 1, + "Metadata_Imaging_ExposureTime": 100.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Medium 3D image profile +# ============================================================================ +@pytest.fixture +def medium_image_profile() -> TestProfile: + """Profile with a medium 3D image (z=16, y=32, x=32).""" + return TestProfile( + image_array=np.random.rand(16, 32, 32).astype(np.float32), + features={ + "Nuclei_DNA_Intensity_MeanIntensity": 0.528, + "Nuclei_DNA_Intensity_MaxIntensity": 0.992, + "Nuclei_DNA_Intensity_StdIntensity": 0.125, + "Nuclei_DNA_Texture_Entropy-256-3": 5.234, + "Cell_DNA_Areasizeshape_Volume": 512.0, + "Cell_DNA_Areasizeshape_SurfaceArea": 256.0, + }, + metadata={ + "Metadata_Object_ObjectID": 42, + "Metadata_Imaging_ExposureTime": 50.0, + "Metadata_Microscopy_Magnification": 60.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Large 3D image profile +# ============================================================================ +@pytest.fixture +def large_image_profile() -> TestProfile: + """Profile with a larger 3D image (z=32, y=64, x=64).""" + return TestProfile( + image_array=np.random.rand(32, 64, 64).astype(np.float32), + features={ + "Nuclei_DNA_Intensity_MeanIntensity": 0.435, + "Nuclei_DNA_Intensity_MaxIntensity": 0.998, + "Nuclei_DNA_Intensity_MinIntensity": 0.001, + "Nuclei_DNA_Intensity_StdIntensity": 0.187, + "Nuclei_DNA_Intensity_MedianIntensity": 0.442, + "Nuclei_DNA_Texture_Entropy-256-3": 6.145, + "Nuclei_DNA_Texture_Gabor-3-0": 0.234, + "Cell_DNA_Areasizeshape_Volume": 2048.0, + "Cell_DNA_Areasizeshape_SurfaceArea": 1024.0, + "Cell_DNA_Areasizeshape_Sphericity": 0.856, + "Cytoplasm_DNA_Intensity_MeanIntensity": 0.312, + }, + metadata={ + "Metadata_Object_ObjectID": 123, + "Metadata_Storage_FilePath": "/data/images/cell_001.tif", + "Metadata_Imaging_ExposureTime": 25.0, + "Metadata_Microscopy_Magnification": 60.0, + "Metadata_Biology_CellType": "NPC", + "Metadata_Experiment_Treatment": "Control", + }, + ) + + +# ============================================================================ +# FIXTURE: Multi-channel intensity profile +# ============================================================================ +@pytest.fixture +def intensity_profile() -> TestProfile: + """Profile focused on intensity features from multiple channels.""" + return TestProfile( + image_array=np.random.rand(16, 48, 48).astype(np.float32), + features={ + "Nuclei_DNA_Intensity_MeanIntensity": 0.654, + "Nuclei_DNA_Intensity_MaxIntensity": 0.989, + "Nuclei_DNA_Intensity_StdIntensity": 0.156, + "Nuclei_Mito_Intensity_MeanIntensity": 0.412, + "Nuclei_Mito_Intensity_MaxIntensity": 0.956, + "Nuclei_ER_Intensity_MeanIntensity": 0.378, + "Cell_DNA_Intensity_MeanIntensity": 0.523, + "Cell_Mito_Intensity_MeanIntensity": 0.387, + "Cell_ER_Intensity_MeanIntensity": 0.445, + }, + metadata={ + "Metadata_Object_ObjectID": 456, + "Metadata_Imaging_ExposureTime": 30.0, + "Metadata_Microscopy_Magnification": 100.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Texture profile +# ============================================================================ +@pytest.fixture +def texture_profile() -> TestProfile: + """Profile focused on texture features.""" + return TestProfile( + image_array=np.random.rand(12, 40, 40).astype(np.float32), + features={ + "Nuclei_DNA_Texture_Entropy-256-3": 5.678, + "Nuclei_DNA_Texture_Gabor-3-0": 0.234, + "Nuclei_DNA_Texture_Gabor-3-45": 0.198, + "Nuclei_DNA_Texture_Gabor-3-90": 0.212, + "Nuclei_DNA_Texture_Gabor-3-135": 0.205, + "Cytoplasm_Mito_Texture_Entropy-256-3": 4.892, + "Cytoplasm_Mito_Texture_Contrast-3": 0.456, + "Cell_DNA_Texture_Entropy-256-3": 6.123, + }, + metadata={ + "Metadata_Object_ObjectID": 789, + "Metadata_Imaging_ExposureTime": 50.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Morphology profile +# ============================================================================ +@pytest.fixture +def morphology_profile() -> TestProfile: + """Profile focused on area/size/shape features.""" + return TestProfile( + image_array=np.random.rand(20, 56, 56).astype(np.float32), + features={ + "Nuclei_DNA_Areasizeshape_Volume": 1024.0, + "Nuclei_DNA_Areasizeshape_SurfaceArea": 512.0, + "Nuclei_DNA_Areasizeshape_Sphericity": 0.892, + "Nuclei_DNA_Areasizeshape_Solidity": 0.945, + "Nuclei_DNA_Areasizeshape_Eccentricity": 0.342, + "Nuclei_DNA_Areasizeshape_EulerCharacteristic": 1.0, + "Cell_DNA_Areasizeshape_Volume": 4096.0, + "Cell_DNA_Areasizeshape_SurfaceArea": 2048.0, + "Cell_DNA_Areasizeshape_Sphericity": 0.756, + }, + metadata={ + "Metadata_Object_ObjectID": 321, + "Metadata_Storage_FilePath": "/data/images/cell_002.tif", + "Metadata_Imaging_ExposureTime": 40.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Colocalization profile +# ============================================================================ +@pytest.fixture +def colocalization_profile() -> TestProfile: + """Profile focused on colocalization features.""" + return TestProfile( + image_array=np.random.rand(14, 44, 44).astype(np.float32), + features={ + "Cell_DNA-Mito_Colocalization_Correlation": 0.623, + "Cell_DNA-Mito_Colocalization_Overlap": 0.456, + "Cell_DNA-ER_Colocalization_Correlation": 0.234, + "Cell_DNA-ER_Colocalization_Overlap": 0.178, + "Cell_Mito-ER_Colocalization_Correlation": 0.567, + "Cell_Mito-ER_Colocalization_Overlap": 0.389, + "Nuclei_DNA-Mito_Colocalization_Correlation": 0.345, + }, + metadata={ + "Metadata_Object_ObjectID": 654, + "Metadata_Imaging_ExposureTime": 60.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Granularity profile +# ============================================================================ +@pytest.fixture +def granularity_profile() -> TestProfile: + """Profile focused on granularity features.""" + return TestProfile( + image_array=np.random.rand(16, 48, 48).astype(np.float32), + features={ + "Nuclei_DNA_Granularity_Spectrum-1": 0.234, + "Nuclei_DNA_Granularity_Spectrum-2": 0.256, + "Nuclei_DNA_Granularity_Spectrum-3": 0.289, + "Nuclei_DNA_Granularity_Spectrum-4": 0.312, + "Nuclei_DNA_Granularity_Spectrum-5": 0.334, + "Nuclei_DNA_Granularity_Spectrum-6": 0.345, + "Nuclei_DNA_Granularity_Spectrum-7": 0.356, + "Nuclei_DNA_Granularity_Spectrum-8": 0.362, + "Nuclei_DNA_Granularity_Spectrum-9": 0.365, + "Nuclei_DNA_Granularity_Spectrum-10": 0.367, + }, + metadata={ + "Metadata_Object_ObjectID": 987, + "Metadata_Imaging_ExposureTime": 35.0, + }, + ) + + +# ============================================================================ +# FIXTURE: Neighbors profile +# ============================================================================ +@pytest.fixture +def neighbors_profile() -> TestProfile: + """Profile focused on neighbor-based features.""" + return TestProfile( + image_array=np.random.rand(10, 32, 32).astype(np.float32), + features={ + "Nuclei_NoChannel_Neighbors_AdjacentCount": 6.0, + "Nuclei_NoChannel_Neighbors_NumberOfNeighbors": 8.0, + "Nuclei_NoChannel_Neighbors_DistanceClosestNeighbor": 15.234, + "Nuclei_NoChannel_Neighbors_PercentTouching": 0.35, + "Cell_NoChannel_Neighbors_AdjacentCount": 12.0, + "Cell_NoChannel_Neighbors_NumberOfNeighbors": 15.0, + }, + metadata={ + "Metadata_Object_ObjectID": 110, + "Metadata_Neighbors_AdjacentCount": 6, + }, + ) + + +# ============================================================================ +# FIXTURE: Complete profile with all feature types +# ============================================================================ +@pytest.fixture +def complete_profile() -> TestProfile: + """Complete profile with a mix of all feature types.""" + return TestProfile( + image_array=np.random.rand(24, 64, 64).astype(np.float32), + features={ + # Intensity + "Nuclei_DNA_Intensity_MeanIntensity": 0.543, + "Nuclei_DNA_Intensity_MaxIntensity": 0.987, + "Cell_DNA_Intensity_MeanIntensity": 0.421, + # Texture + "Nuclei_DNA_Texture_Entropy-256-3": 5.892, + "Nuclei_DNA_Texture_Gabor-3-0": 0.234, + # Morphology + "Nuclei_DNA_Areasizeshape_Volume": 1536.0, + "Nuclei_DNA_Areasizeshape_Sphericity": 0.865, + # Granularity + "Nuclei_DNA_Granularity_Spectrum-5": 0.334, + # Colocalization + "Cell_DNA-Mito_Colocalization_Correlation": 0.512, + # Neighbors + "Nuclei_NoChannel_Neighbors_AdjacentCount": 5.0, + }, + metadata={ + "Metadata_Storage_FilePath": "/data/images/sample_001.tif", + "Metadata_Object_ObjectID": 100, + "Metadata_Biology_CellType": "Neuron", + "Metadata_Imaging_ExposureTime": 45.0, + "Metadata_Microscopy_Magnification": 63.0, + "Metadata_Experiment_Treatment": "Drug_A", + "Metadata_Location_CentroidX": 156.5, + "Metadata_Location_CentroidY": 234.2, + "Metadata_Location_CentroidZ": 12.8, + }, + ) + + +# ============================================================================ +# COLLECTION FIXTURES: Groups of profiles +# ============================================================================ +@pytest.fixture +def all_profiles( + minimal_profile: TestProfile, + small_image_profile: TestProfile, + medium_image_profile: TestProfile, + large_image_profile: TestProfile, +) -> list[TestProfile]: + """Collection of profiles with increasing image sizes.""" + return [ + small_image_profile, + minimal_profile, + medium_image_profile, + large_image_profile, + ] + + +@pytest.fixture +def all_feature_type_profiles( + request: pytest.FixtureRequest, +) -> list[TestProfile]: + """Collection of profiles focused on different feature types.""" + fixture_names = [ + "intensity_profile", + "texture_profile", + "morphology_profile", + "colocalization_profile", + "granularity_profile", + "neighbors_profile", + ] + return [request.getfixturevalue(fixture_name) for fixture_name in fixture_names] + + +# ============================================================================ +# PARAMETERIZED FIXTURE DATA +# ============================================================================ +@pytest.fixture( + params=[ + (4, 8, 8), + (8, 16, 16), + (12, 24, 24), + (16, 32, 32), + (20, 40, 40), + ] +) +def varying_image_sizes(request: pytest.FixtureRequest) -> tuple[int, int, int]: + """Parameterized fixture for various 3D image dimensions.""" + return request.param + + +@pytest.fixture +def profile_with_varying_size( + varying_image_sizes: tuple[int, int, int], +) -> TestProfile: + """Profile with varying image sizes.""" + z, y, x = varying_image_sizes + return TestProfile( + image_array=np.random.rand(z, y, x).astype(np.float32), + features={"Nuclei_DNA_Intensity_MeanIntensity": 0.512}, + metadata={"Metadata_Object_ObjectID": 1}, + ) diff --git a/tests/test_contracts.py b/tests/test_contracts.py index b454645..f5637c5 100644 --- a/tests/test_contracts.py +++ b/tests/test_contracts.py @@ -1,39 +1,260 @@ -"""Tests for core data contracts.""" +"""Tests for data contract validation in zedprofiler.contracts.""" from __future__ import annotations -from dataclasses import FrozenInstanceError +from pathlib import Path import numpy as np import pytest from zedprofiler.contracts import ( - MULTI_CHANNEL_CONTRACT, - SINGLE_CHANNEL_CONTRACT, - FloatArray, - ImageArrayContract, - IntArray, + ExpectedValues, + validate_column_name_schema, + validate_image_array_shape_contracts, + validate_image_array_type_contracts, + validate_return_schema_contract, ) +from zedprofiler.exceptions import ContractError -def test_contract_constants_have_expected_values() -> None: - """Single- and multi-channel constants should match documented shapes.""" - assert ImageArrayContract(dimensions=3, order="zyx") == SINGLE_CHANNEL_CONTRACT - assert ImageArrayContract(dimensions=4, order="czyx") == MULTI_CHANNEL_CONTRACT +@pytest.fixture +def expected_values_config_path(tmp_path: Path) -> Path: + """Create a temporary expected-values TOML configuration file.""" + config_path = tmp_path / "config.toml" + config_path.write_text( + """ +[expected_values] +compartments = ["Nuclei", "Cytoplasm", "Cell", "Organoid"] +channels = ["DNA", "AGP", "ER", "Mito"] +""".strip() + ) + return config_path -def test_image_array_contract_is_frozen() -> None: - """Contracts are immutable to prevent accidental mutation in pipelines.""" - contract = ImageArrayContract(dimensions=3, order="zyx") +def test_validate_image_array_shape_contracts_accepts_valid_3d_array() -> None: + arr = np.zeros((8, 16, 16), dtype=float) - with pytest.raises(FrozenInstanceError): - contract.dimensions = 4 # type: ignore[misc] + assert validate_image_array_shape_contracts(arr) is True -def test_array_type_aliases_accept_numpy_ndarray_runtime_values() -> None: - """Type aliases should correspond to ndarray values at runtime.""" - float_values: FloatArray = np.array([1.0, 2.0, 3.0], dtype=float) - int_values: IntArray = np.array([1, 2, 3], dtype=int) +def test_validate_image_array_shape_contracts_accepts_single_channel_4d_array() -> None: + arr = np.zeros((1, 8, 16, 16), dtype=float) - assert isinstance(float_values, np.ndarray) - assert isinstance(int_values, np.ndarray) + assert validate_image_array_shape_contracts(arr) is True + + +def test_validate_image_array_shape_contracts_rejects_2d_array() -> None: + arr = np.zeros((16, 16), dtype=float) + + with pytest.raises(ContractError): + validate_image_array_shape_contracts(arr) + + +def test_validate_image_array_shape_contracts_rejects_multichannel_4d_array() -> None: + arr = np.zeros((2, 8, 16, 16), dtype=float) + + with pytest.raises(ContractError): + validate_image_array_shape_contracts(arr) + + +def test_validate_image_array_shape_contracts_rejects_multichannel_5d_array() -> None: + arr = np.zeros((2, 2, 4, 8, 8), dtype=float) + + with pytest.raises(ContractError): + validate_image_array_shape_contracts(arr) + + +def test_validate_image_array_shape_contracts_rejects_all_singleton_3d_array() -> None: + arr = np.zeros((1, 1, 1), dtype=float) + + with pytest.raises(ContractError): + validate_image_array_shape_contracts(arr) + + +def test_validate_image_array_type_contracts_accepts_numeric_array() -> None: + arr = np.zeros((8, 8, 8), dtype=np.float32) + + assert validate_image_array_type_contracts(arr) is True + + +def test_validate_image_array_type_contracts_rejects_non_numpy_array() -> None: + arr = [[1, 2], [3, 4]] + + with pytest.raises(ContractError): + validate_image_array_type_contracts(arr) # type: ignore[arg-type] + + +def test_validate_image_array_type_contracts_rejects_non_numeric_dtype() -> None: + arr = np.array([["a", "b"], ["c", "d"]], dtype=str) + + with pytest.raises(ContractError): + validate_image_array_type_contracts(arr) + + +def test_expected_values_loads_config_and_adds_nochannel( + expected_values_config_path: Path, +) -> None: + values = ExpectedValues(expected_values_config_path) + + assert "Nuclei" in values.compartments + assert "DNA" in values.channels + assert "NoChannel" in values.channels + assert "Intensity" in values.features + + +def test_expected_values_to_dict_returns_expected_keys( + expected_values_config_path: Path, +) -> None: + values = ExpectedValues(expected_values_config_path).to_dict() + + assert set(values.keys()) == {"compartments", "channels", "features"} + + +def test_validate_column_name_schema_accepts_valid_feature_column( + expected_values_config_path: Path, +) -> None: + valid_name = "Nuclei_DNA_Intensity_MeanIntensity" + + assert validate_column_name_schema(valid_name, expected_values_config_path) is True + + +def test_validate_column_name_schema_accepts_valid_metadata_column( + expected_values_config_path: Path, +) -> None: + valid_name = "Metadata_Storage_FilePath" + + assert validate_column_name_schema(valid_name, expected_values_config_path) is True + + +def test_validate_column_name_schema_rejects_non_string_column_name( + expected_values_config_path: Path, +) -> None: + with pytest.raises(ContractError): + validate_column_name_schema(123, expected_values_config_path) # type: ignore[arg-type] + + +def test_validate_column_name_schema_rejects_non_metadata_with_too_few_parts( + expected_values_config_path: Path, +) -> None: + invalid_name = "Nuclei_DNA_Intensity" + + with pytest.raises(ContractError): + validate_column_name_schema(invalid_name, expected_values_config_path) + + +def test_validate_column_name_schema_rejects_metadata_with_too_few_parts( + expected_values_config_path: Path, +) -> None: + invalid_name = "Metadata_Storage" + + with pytest.raises(ContractError): + validate_column_name_schema(invalid_name, expected_values_config_path) + + +def test_validate_column_name_schema_rejects_unknown_compartment( + expected_values_config_path: Path, +) -> None: + invalid_name = "Nucleus_DNA_Intensity_MeanIntensity" + + with pytest.raises(ContractError): + validate_column_name_schema(invalid_name, expected_values_config_path) + + +def test_validate_column_name_schema_rejects_unknown_channel( + expected_values_config_path: Path, +) -> None: + invalid_name = "Nuclei_GFP_Intensity_MeanIntensity" + + with pytest.raises(ContractError): + validate_column_name_schema(invalid_name, expected_values_config_path) + + +def test_validate_column_name_schema_rejects_unknown_feature( + expected_values_config_path: Path, +) -> None: + invalid_name = "Nuclei_DNA_UnknownFeature_MeanIntensity" + + with pytest.raises(ContractError): + validate_column_name_schema(invalid_name, expected_values_config_path) + + +def test_validate_return_schema_contract_accepts_valid_result() -> None: + result = { + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + "metadata": {"Metadata_Object_ObjectID": 1}, + } + + assert validate_return_schema_contract(result) is True + + +def test_validate_return_schema_contract_rejects_non_dict_result() -> None: + with pytest.raises(ContractError): + validate_return_schema_contract(["not", "a", "dict"]) # type: ignore[arg-type] + + +def test_validate_return_schema_contract_rejects_missing_required_key() -> None: + result = { + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) # type: ignore[arg-type] + + +def test_validate_return_schema_contract_rejects_extra_key() -> None: + result = { + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + "metadata": {"Metadata_Object_ObjectID": 1}, + "extra": 123, + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) + + +def test_validate_return_schema_contract_rejects_wrong_key_order() -> None: + # Dict insertion order is deterministic in Python 3.7+; this order is intentional. + result = { + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "metadata": {"Metadata_Object_ObjectID": 1}, + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) + + +def test_validate_return_schema_contract_rejects_invalid_image_array_type() -> None: + result = { + "image_array": [[1, 2], [3, 4]], + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + "metadata": {"Metadata_Object_ObjectID": 1}, + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) + + +def test_validate_return_schema_contract_rejects_invalid_features_type() -> None: + result = { + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "features": ["not", "a", "dict"], + "metadata": {"Metadata_Object_ObjectID": 1}, + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) + + +def test_validate_return_schema_contract_rejects_invalid_metadata_type() -> None: + result = { + "image_array": np.zeros((4, 8, 8), dtype=np.float32), + "features": {"Nuclei_DNA_Intensity_MeanIntensity": 0.5}, + "metadata": ["not", "a", "dict"], + } + + with pytest.raises(ContractError): + validate_return_schema_contract(result) diff --git a/tests/test_data_profiles.py b/tests/test_data_profiles.py new file mode 100644 index 0000000..f326e79 --- /dev/null +++ b/tests/test_data_profiles.py @@ -0,0 +1,28 @@ +"""Test data profiles utilities for ZedProfiler feature extraction. + +This module provides the TestProfile dataclass for working with test data. +All fixture definitions are in conftest.py for easy pytest discovery and reuse. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + + +@dataclass +class TestProfile: + """A complete test profile with image data, features, and metadata.""" + + image_array: np.ndarray + features: dict + metadata: dict + + def to_dict(self) -> dict: + """Convert profile to dictionary format.""" + return { + "image_array": self.image_array, + "features": self.features, + "metadata": self.metadata, + } diff --git a/tests/test_featurization_scaffolds.py b/tests/test_featurization_scaffolds.py new file mode 100644 index 0000000..c68c4d8 --- /dev/null +++ b/tests/test_featurization_scaffolds.py @@ -0,0 +1,37 @@ +"""Tests for featurization scaffold placeholder behavior.""" + +from __future__ import annotations + +from types import ModuleType + +import pytest + +from zedprofiler.exceptions import ZedProfilerError +from zedprofiler.featurization import ( + areasizeshape, + colocalization, + granularity, + intensity, + neighbors, + texture, +) + + +@pytest.mark.parametrize( + ("module", "message"), + [ + (areasizeshape, "areasizeshape.compute is not implemented yet"), + (colocalization, "colocalization.compute is not implemented yet"), + (granularity, "granularity.compute is not implemented yet"), + (intensity, "intensity.compute is not implemented yet"), + (neighbors, "neighbors.compute is not implemented yet"), + (texture, "texture.compute is not implemented yet"), + ], +) +def test_scaffold_compute_raises_not_implemented( + module: ModuleType, + message: str, +) -> None: + """Each scaffolded compute function should raise a clear placeholder error.""" + with pytest.raises(ZedProfilerError, match=message): + module.compute() diff --git a/tests/test_package_exports.py b/tests/test_package_exports.py index 27c1f46..5035cac 100644 --- a/tests/test_package_exports.py +++ b/tests/test_package_exports.py @@ -1,4 +1,4 @@ -"""Tests for package export ergonomics.""" +r"""Tests for package export ergonomics.""" import zedprofiler from zedprofiler import colocalization diff --git a/tests/test_profile_fixtures.py b/tests/test_profile_fixtures.py new file mode 100644 index 0000000..221f55b --- /dev/null +++ b/tests/test_profile_fixtures.py @@ -0,0 +1,133 @@ +"""Test suite demonstrating the usage of test data profile fixtures. + +This module shows how to use the available test data profiles in your tests. +""" + +from __future__ import annotations + +import numpy as np +from test_data_profiles import TestProfile + +EXPECTED_IMAGE_NDIM = 3 +EXPECTED_ALL_PROFILES_COUNT = 4 +EXPECTED_FEATURE_TYPE_PROFILES_COUNT = 6 + + +class TestProfileFixtures: + """Tests demonstrating profile fixture usage.""" + + def test_minimal_profile(self, minimal_profile: TestProfile) -> None: + """Verify minimal profile has correct structure.""" + assert isinstance(minimal_profile.image_array, np.ndarray) + assert minimal_profile.image_array.ndim == EXPECTED_IMAGE_NDIM + assert len(minimal_profile.features) == 0 + assert len(minimal_profile.metadata) == 0 + + def test_small_image_profile(self, small_image_profile: TestProfile) -> None: + """Verify small profile has expected shape and data.""" + assert small_image_profile.image_array.shape == (4, 8, 8) + assert "Nuclei_DNA_Intensity_MeanIntensity" in small_image_profile.features + assert small_image_profile.metadata["Metadata_Object_ObjectID"] == 1 + + def test_medium_image_profile(self, medium_image_profile: TestProfile) -> None: + """Verify medium profile contains mixed feature types.""" + assert medium_image_profile.image_array.shape == (16, 32, 32) + # Check for multiple feature types + features = medium_image_profile.features + assert any("Intensity" in f for f in features) + assert any("Texture" in f for f in features) + assert any("Areasizeshape" in f for f in features) + + def test_complete_profile(self, complete_profile: TestProfile) -> None: + """Verify complete profile has comprehensive feature coverage.""" + assert complete_profile.image_array.shape == (24, 64, 64) + features = complete_profile.features + # Verify all feature types are present + assert any("Intensity" in f for f in features) + assert any("Texture" in f for f in features) + assert any("Areasizeshape" in f for f in features) + assert any("Colocalization" in f for f in features) + assert any("Granularity" in f for f in features) + assert any("Neighbors" in f for f in features) + + def test_intensity_profile(self, intensity_profile: TestProfile) -> None: + """Verify intensity profile focuses on intensity features.""" + features = intensity_profile.features + intensity_count = sum(1 for f in features if "Intensity" in f) + total_count = len(features) + assert intensity_count > total_count * 0.7 # Majority are intensity + + def test_texture_profile(self, texture_profile: TestProfile) -> None: + """Verify texture profile focuses on texture features.""" + features = texture_profile.features + texture_count = sum(1 for f in features if "Texture" in f) + total_count = len(features) + assert texture_count > total_count * 0.7 # Majority are texture + + def test_morphology_profile(self, morphology_profile: TestProfile) -> None: + """Verify morphology profile focuses on shape/size features.""" + features = morphology_profile.features + morph_count = sum(1 for f in features if "Areasizeshape" in f) + total_count = len(features) + assert morph_count > total_count * 0.7 # Majority are morphology + + def test_colocalization_profile(self, colocalization_profile: TestProfile) -> None: + """Verify colocalization profile has colocalization features.""" + features = colocalization_profile.features + assert all("Colocalization" in f for f in features) + + def test_granularity_profile(self, granularity_profile: TestProfile) -> None: + """Verify granularity profile has granularity spectrum features.""" + features = granularity_profile.features + assert all("Granularity" in f for f in features) + + def test_neighbors_profile(self, neighbors_profile: TestProfile) -> None: + """Verify neighbors profile has neighbor features.""" + features = neighbors_profile.features + assert all("Neighbors" in f for f in features) + + def test_all_profiles_collection(self, all_profiles: list[TestProfile]) -> None: + """Verify collection fixture contains expected profiles.""" + assert len(all_profiles) == EXPECTED_ALL_PROFILES_COUNT + assert all(isinstance(p, TestProfile) for p in all_profiles) + # Verify increasing sizes + sizes = [p.image_array.size for p in all_profiles] + assert sizes == sorted(sizes) + + def test_all_feature_type_profiles( + self, all_feature_type_profiles: list[TestProfile] + ) -> None: + """Verify feature type collection has all feature types.""" + assert len(all_feature_type_profiles) == EXPECTED_FEATURE_TYPE_PROFILES_COUNT + assert all(isinstance(p, TestProfile) for p in all_feature_type_profiles) + + +class TestProfileWithVaryingSize: + """Tests demonstrating parameterized profile fixtures.""" + + def test_varying_profile_sizes( + self, profile_with_varying_size: TestProfile + ) -> None: + """Verify profile_with_varying_size fixture produces valid profiles.""" + assert isinstance(profile_with_varying_size.image_array, np.ndarray) + assert profile_with_varying_size.image_array.ndim == EXPECTED_IMAGE_NDIM + assert ( + "Nuclei_DNA_Intensity_MeanIntensity" in profile_with_varying_size.features + ) + assert profile_with_varying_size.metadata["Metadata_Object_ObjectID"] == 1 + + +def test_profile_to_dict() -> None: + """Verify TestProfile.to_dict() method works correctly.""" + profile = TestProfile( + image_array=np.ones((4, 8, 8)), + features={"test_feature": 0.5}, + metadata={"test_meta": "value"}, + ) + + profile_dict = profile.to_dict() + assert isinstance(profile_dict, dict) + assert "image_array" in profile_dict + assert "features" in profile_dict + assert "metadata" in profile_dict + assert np.array_equal(profile_dict["image_array"], profile.image_array) diff --git a/uv.lock b/uv.lock index e5ad51b..d7f58a4 100644 --- a/uv.lock +++ b/uv.lock @@ -1377,6 +1377,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, ] +[[package]] +name = "pyarrow" +version = "23.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066 }, + { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526 }, + { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279 }, + { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798 }, + { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446 }, + { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972 }, + { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749 }, + { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544 }, + { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911 }, + { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337 }, + { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944 }, + { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269 }, + { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794 }, + { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642 }, + { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755 }, + { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826 }, + { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859 }, + { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443 }, + { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991 }, + { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077 }, + { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271 }, + { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692 }, + { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383 }, + { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119 }, + { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199 }, + { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435 }, + { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149 }, + { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807 }, +] + [[package]] name = "pycparser" version = "2.22" @@ -1907,6 +1943,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 }, ] +[[package]] +name = "tomli" +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866 }, + { url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887 }, + { url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704 }, + { url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628 }, + { url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180 }, + { url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674 }, + { url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976 }, + { url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755 }, + { url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265 }, + { url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726 }, + { url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859 }, + { url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713 }, + { url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084 }, + { url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973 }, + { url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223 }, + { url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973 }, + { url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082 }, + { url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490 }, + { url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263 }, + { url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736 }, + { url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717 }, + { url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461 }, + { url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855 }, + { url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144 }, + { url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683 }, + { url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196 }, + { url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393 }, + { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583 }, +] + [[package]] name = "tornado" version = "6.5.1" @@ -2024,6 +2096,8 @@ dependencies = [ { name = "fire" }, { name = "jinja2" }, { name = "pandas" }, + { name = "pyarrow" }, + { name = "tomli" }, ] [package.dev-dependencies] @@ -2051,19 +2125,21 @@ requires-dist = [ { name = "fire", specifier = ">=0.7.1" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "pandas", specifier = ">=3.0.2" }, + { name = "pyarrow", specifier = ">=23.0.1" }, + { name = "tomli", specifier = ">=2.4.1" }, ] [package.metadata.requires-dev] dev = [ - { name = "poethepoet", specifier = ">=0.44.0" }, + { name = "poethepoet", specifier = ">=0.44" }, { name = "pytest", specifier = ">=9.0.3" }, { name = "pytest-cov", specifier = ">=5" }, { name = "ruff", specifier = ">=0.15.10" }, ] docs = [ - { name = "myst-nb", specifier = ">=1.4.0" }, - { name = "pydata-sphinx-theme", specifier = ">=0.17.0" }, - { name = "sphinx", specifier = ">=9.1.0" }, + { name = "myst-nb", specifier = ">=1.4" }, + { name = "pydata-sphinx-theme", specifier = ">=0.17" }, + { name = "sphinx", specifier = ">=9.1" }, ] notebooks = [ { name = "black", specifier = ">=26.3.1" },