WayScience · MikeLippincott · Apr 20, 2026 · d33bs · Apr 23, 2026 · d33bs
diff --git a/.gitignore b/.gitignore
@@ -172,3 +172,4 @@ cython_debug/
 
 # PyPI configuration file
 .pypirc
+data/*
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # ZedProfiler
 
-[![Coverage](https://img.shields.io/badge/coverage-87%25-green)](#quality-gates)
+[![Coverage](https://img.shields.io/badge/coverage-99%25-brightgreen)](#quality-gates)
 
 CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling.
 

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -36,13 +36,13 @@ The roadmap is intended to be a living document and may be updated as needed.
 
 1. PR 1: Packaging and environment baseline
 
-- [ ] Python package scaffold, uv dependency management, version metadata 0.0.1, lint/test tooling, CI skeleton.
-- [ ] Linux support and CPU-only scope statements in metadata and docs.
+- [x] Python package scaffold, uv dependency management, version metadata 0.0.1, lint/test tooling, CI skeleton.
+- [x] Linux support and CPU-only scope statements in metadata and docs.
 
 2. PR 2: Core data model and API contracts
 
-- [ ] Canonical input contracts, loader interfaces, common error types.
-- [ ] Return schema contract (required keys, types, deterministic ordering).
+- [x] Canonical input contracts, loader interfaces, common error types.
+- [x] Return schema contract (required keys, types, deterministic ordering).
 
 3. PR 3: RFC2119 naming specification and validators
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,8 @@ dependencies = [
   "fire>=0.7.1",
   "jinja2>=3.1.6",
   "pandas>=3.0.2",
+  "pyarrow>=23.0.1",
+  "tomli>=2.4.1",
 ]
 scripts.ZedProfiler = "ZedProfiler.cli:trigger"
 

diff --git a/src/zedprofiler/contracts.py b/src/zedprofiler/contracts.py
@@ -1,31 +1,251 @@
 """Core data contracts shared across featurizers.
 
-The package accepts either:
+The package accepts:
 - Single-channel 3D arrays shaped (z, y, x)
-- Multi-channel 4D arrays shaped (c, z, y, x)
 """
 
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing import Literal
+import pathlib
+from dataclasses import dataclass, field
 
 import numpy as np
-import numpy.typing as npt
+import tomli
 
-DimensionOrder = Literal["zyx", "czyx"]
+from zedprofiler.exceptions import ContractError
 
+EXPECTED_SPATIAL_DIMS = 3
+TWO_DIMENSIONAL = 2
+FOUR_DIMENSIONAL = 4
+FIVE_OR_MORE_DIMENSIONS = 5
+REQUIRED_RETURN_KEYS = ("image_array", "features", "metadata")
 
-@dataclass(frozen=True)
-class ImageArrayContract:
-    """Document expected dimensionality and ordering for input arrays."""
 
-    dimensions: int
-    order: DimensionOrder
+def validate_image_array_shape_contracts(
+    arr: np.ndarray,
+) -> bool:
+    """
+    Validate the input array for dimensionality
 
+    Parameters
+    ----------
+    arr : np.ndarray
+        Input array to validate
 
-FloatArray = npt.NDArray[np.floating]
-IntArray = npt.NDArray[np.integer]
+    Returns
+    -------
+    bool
+        The status of the validation
 
-SINGLE_CHANNEL_CONTRACT = ImageArrayContract(dimensions=3, order="zyx")
-MULTI_CHANNEL_CONTRACT = ImageArrayContract(dimensions=4, order="czyx")
+    Raises
+    ------
+    ContractError
+        If the input array does not meet the expected contract
+    """
+
+    arr_shape = arr.shape
+    if len(arr_shape) == TWO_DIMENSIONAL:
+        raise ContractError(
+            f"Input array has shape {arr_shape} with {TWO_DIMENSIONAL} dimensions. "
+            f"Expected {EXPECTED_SPATIAL_DIMS} dimensions."
+        )
+    elif len(arr_shape) == FOUR_DIMENSIONAL and arr_shape[0] > 1:
+        raise ContractError(
+            f"Input array has shape {arr_shape} with {FOUR_DIMENSIONAL} dimensions, "
+            "but the first dimension (channels) has size "
+            f"{arr_shape[0]}. Expected a single-channel 3D array."
+        )
+    elif (
+        len(arr_shape) >= FIVE_OR_MORE_DIMENSIONS
+        and arr_shape[0] > 1
+        and arr_shape[1] > 1
+    ):
+        raise ContractError(
+            f"Input array has shape {arr_shape} with {len(arr_shape)} dimensions. "
+            f"Expected {EXPECTED_SPATIAL_DIMS} dimensions."
+        )
+
+    for dim_size in arr_shape:
+        if dim_size <= 0:
+            raise ContractError(
+                f"Input array has shape {arr_shape} with non-positive dimension size. "
+                "All dimensions must have size greater than 0."
+            )
+    if sum(arr_shape) == len(arr_shape):
+        raise ContractError(
+            f"Input array has shape {arr_shape} with one or more dimensions of size 1. "
+            "Expected all three dimensions to have size greater than 1."
+        )
+    return True
+
+
+def validate_image_array_type_contracts(
+    arr: np.ndarray,
+) -> bool:
+    """
+    Validate the input array for type
+
+    Parameters
+    ----------
+    arr : np.ndarray
+        Input array to validate
+
+    Returns
+    -------
+    bool
+        The status of the validation
+
+    Raises
+    ------
+    ContractError
+        If the input array does not meet the expected contract
+    """
+    if not isinstance(arr, np.ndarray):
+        raise ContractError(f"Input is of type {type(arr)}, expected a numpy array.")
+    # check for numeric dtype (int or float) in the array
+    if not np.issubdtype(arr.dtype, np.number):
+        raise ContractError(
+            f"Input array has dtype {arr.dtype}, expected a numeric dtype "
+            "(int or float)."
+        )
+    return True
+
+
+def validate_return_schema_contract(
+    result: dict[str, object],
+) -> bool:
+    """Validate return schema keys, types, and deterministic key ordering."""
+    if not isinstance(result, dict):
+        raise ContractError(f"Return result must be a dict, got {type(result)}.")
+
+    actual_keys = tuple(result.keys())
+    if actual_keys != REQUIRED_RETURN_KEYS:
+        raise ContractError(
+            "Return result keys must match required deterministic order "
+            f"{REQUIRED_RETURN_KEYS}, got {actual_keys}."
+        )
+
+    if not isinstance(result["image_array"], np.ndarray):
+        raise ContractError("Return result key 'image_array' must be a numpy array.")
+    if not isinstance(result["features"], dict):
+        raise ContractError("Return result key 'features' must be a dict.")
+    if not isinstance(result["metadata"], dict):
+        raise ContractError("Return result key 'metadata' must be a dict.")
+
+    return True
+
+
+@dataclass
+class ExpectedValues:
+    """Expected values for feature naming validation tests."""
+
+    config_file_path: pathlib.Path
+    compartments: list[str] = field(default_factory=list)
+    channels: list[str] = field(default_factory=list)
+    features: list[str] = field(default_factory=list)
+
+    def __post_init__(self) -> None:
+        """Load expected values from a TOML configuration file."""
+        config = tomli.loads(self.config_file_path.read_text())
+        self.compartments = list(set(config["expected_values"]["compartments"]))
+        self.channels = list(set(config["expected_values"]["channels"]))
+        # add "NoChannel" as a valid channel for metadata columns
+        # This is automatically added in the ZedProfiler
+        # regardless of input channel we want this added
+        # Add "NoChannel" as a valid channel for metadata columns.
+        self.channels.append("NoChannel")
+        self.features = [
+            "AreaSizeShape",
+            "Correlation",
+            "Granularity",
+            "Intensity",
+            "Neighbors",
+            "Texture",
+            "SAMMed3D",
+            "CHAMMI-75",
+        ]
+
+    def to_dict(self) -> dict[str, list[str]]:
+        """Return expected values as a dictionary."""
+        return {
+            "compartments": self.compartments,
+            "channels": self.channels,
+            "features": self.features,
+        }
+
+
+def validate_column_name_schema(
+    column_name: str,
+    expected_values_config_path: pathlib.Path,
+) -> bool:
+    """
+    Validate the column name schema for required fields and types
+
+    Parameters
+    ----------
+    column_name : str
+        The column name to validate
+    expected_values_config_path : pathlib.Path
+        Path to the configuration file containing expected values for validation
+    Returns
+    -------
+    bool
+        The status of the validation
+    Raises
+    ------
+    ContractError
+        If the column name does not meet the expected schema
+    """
+    non_metadata_underscore_seperated_parts = 4
+    metadata_underscore_seperated_parts = 3
+
+    expected_values = ExpectedValues(expected_values_config_path).to_dict()
+    # check if the column name is a string
+    if not isinstance(column_name, str):
+        raise ContractError(f"Column name must be a string, got {type(column_name)}")
+
+    # check if the column name has at least 4 parts separated by underscores
+    parts = column_name.split("_")
+    if (
+        len(parts) < non_metadata_underscore_seperated_parts
+        and "Metadata" not in column_name
+    ):
+        msg = (
+            "Column name must have at least "
+            f"{non_metadata_underscore_seperated_parts} "
+            "parts separated by underscores, "
+            f"got {len(parts)} parts in '{column_name}'"
+        )
+        raise ContractError(msg)
+
+    if "Metadata" in column_name:
+        if len(parts) < metadata_underscore_seperated_parts:
+            raise ContractError(
+                "Metadata column name must have at least "
+                f"{metadata_underscore_seperated_parts} "
+                "parts separated by "
+                f"underscores, got {len(parts)} parts in '{column_name}'"
+            )
+        return True
+
+    compartment = parts[0]
+    channel = parts[1]
+    feature = parts[2]
+
+    # check if the compartment is one of the expected values
+    expected_compartments = expected_values.get("compartments", [])
+    expected_channels = expected_values.get("channels", [])
+    expected_features = expected_values.get("features", [])
+    msg = (
+        f"Compartment '{compartment}' is not in the expected values: "
+        f"{expected_compartments}"
+    )
+    if compartment not in expected_compartments:
+        raise ContractError(msg)
+    msg = f"Channel '{channel}' is not in the expected values: {expected_channels}"
+    if channel not in expected_channels:
+        raise ContractError(msg)
+    msg = f"Feature '{feature}' is not in expected values: {expected_features}"
+    if feature not in expected_features:
+        raise ContractError(msg)
+    return True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -172,3 +172,4 @@ cython_debug/

		# PyPI configuration file
		.pypirc
		data/*