Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,27 @@ The roadmap is intended to be a living document and may be updated as needed.

4. PR 4: AreaSizeShape module and tests

- [ ] CPU implementation, anisotropy handling, edge cases.
- [x] Implement module

5. PR 5: Colocalization module and tests
1. PR 5: Colocalization module and tests

- [ ] Metrics API, threshold options, schema and naming compliance.
- [x] Implement module

6. PR 6: Intensity module and tests
1. PR 6: Intensity module and tests

- [ ] Object-level intensity features and required helpers.
- [x] Implement module

7. PR 7: Granularity module and tests
1. PR 7: Granularity module and tests

- [ ] CPU granularity spectrum, subsampling behavior, parameter validation.
- [x] Implement module

8. PR 8: Neighbors module and tests
1. PR 8: Neighbors module and tests

- [ ] Neighbor counting APIs, distance threshold and anisotropy handling.
- [x] Implement module

9. PR 9: Texture module and tests
1. PR 9: Texture module and tests

- [ ] Haralick-style texture API, scaling helper, deterministic output ordering.
- [x] Implement module

### Phase 3: Integration, docs, release (PR 10-13)

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ classifiers = [
dependencies = [
"fire>=0.7.1",
"jinja2>=3.1.6",
"mahotas>=1.4.18",
"pandas>=3.0.2",
"scikit-image>=0.26",
"tqdm>=4.67.3",
]
scripts.ZedProfiler = "ZedProfiler.cli:trigger"

Expand Down
153 changes: 147 additions & 6 deletions src/zedprofiler/featurization/texture.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,151 @@
"""Texture featurization module scaffold."""
"""This module generate texture features for each object in the
image using Haralick features."""
Comment on lines +1 to +2

from __future__ import annotations
import gc

from zedprofiler.exceptions import ZedProfilerError
import mahotas
import numpy
import tqdm

from zedprofiler.IO.loading_classes import ObjectLoader

def compute() -> dict[str, list[float]]:
"""Placeholder for texture computation implementation."""
raise ZedProfilerError("texture.compute is not implemented yet")

def scale_image(image: numpy.ndarray, num_gray_levels: int = 256) -> numpy.ndarray:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would suggest skimage.exposure.rescale_intensity (docs) over custom function with dtype control and potentially better edge case handling.

"""
Scale the image to a specified number of gray levels.
Example: 1024 gray levels will be scaled to 256 gray levels if
num_gray_levels=256.
An image with a pixel value of 0 will be scaled to 0 and a pixel value
of 1023 will be scaled to 255.

Parameters
----------
image : numpy.ndarray
The input image to be scaled. Can be a ndarray of any shape.
num_gray_levels : int, optional
The number of gray levels to scale the image to, by default 256

Returns
-------
numpy.ndarray
The gray level scaled image of any shape.
"""
# scale the image to the requested gray levels
image_min = image.min()
image_max = image.max()
if image_max == image_min:
return numpy.zeros_like(image, dtype=numpy.uint8)
image = (image - image_min) / (image_max - image_min)
image = (image * (num_gray_levels - 1)).astype(numpy.uint8)
return image


def compute_texture(
Copy link
Copy Markdown

@wli51 wli51 Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I an guessing you are intentionally avoiding using the CP measure texture suite for the sake of better integration with the cytomining ecosystem, correct me if i am assuming too much. Maybe some comments explaining design philosophy would help?

I noted some potential differences/potential performance bottlenecks comparing the two, please see comments below, see what can be borrowed from CP measure to benefit your performance perhaps?

Also I wonder if the crop object with mask and intensity image -> preprocess function (bit rescale here) -> featurization is a recurring functionality that could be abstracted as its own module and shared between different featurization suite?

object_loader: ObjectLoader,
distance: int = 1,
grayscale: int = 256,
) -> dict:
"""
Calculate texture features for each object in the image using Haralick features.

The features are calculated for each object separately and the mean value
is returned.

Parameters
----------
object_loader : ObjectLoader
The object loader containing the image and object information.
distance : int, optional
The distance parameter for Haralick features, by default 1
grayscale : int, optional
The number of gray levels to scale the image to, by default 256

Returns
-------
dict
A dictionary containing the object ID, texture name, and texture value
with keys:
- object_id
- texture_name
- texture_value

Texture names include: Angular Second Moment, Contrast, Correlation,
Variance, Inverse Difference Moment, Sum Average, Sum Variance,
Sum Entropy, Entropy, and related texture measures.

- AngularSecondMoment
- Contrast
- Correlation
- Variance
- InverseDifferenceMoment
- SumAverage
- SumVariance
- SumEntropy
- Entropy
- DifferenceVariance
- DifferenceEntropy
- InformationMeasureOfCorrelation1
- InformationMeasureOfCorrelation2

"""
label_object = object_loader.label_image
labels = object_loader.object_ids
feature_names = [
"AngularSecondMoment",
"Contrast",
"Correlation",
"Variance",
"InverseDifferenceMoment",
"SumAverage",
"SumVariance",
"SumEntropy",
"Entropy",
"DifferenceVariance",
"DifferenceEntropy",
"InformationMeasureOfCorrelation1",
"InformationMeasureOfCorrelation2",
]

output_texture_dict = {
"object_id": [],
"texture_name": [],
"texture_value": [],
}
for index, label in tqdm.tqdm(enumerate(labels)):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

concur on progress bar contamination and index not use problem

selected_label_object = label_object.copy()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seemed really expensive with bigger 3d images and more objects, any way to not copy it?

selected_label_object[selected_label_object != label] = 0
Comment on lines +115 to +116
object_voxels = selected_label_object > 0
if not numpy.any(object_voxels):
continue

z_indices, y_indices, x_indices = numpy.where(object_voxels)
min_z, max_z = numpy.min(z_indices), numpy.max(z_indices)
min_y, max_y = numpy.min(y_indices), numpy.max(y_indices)
min_x, max_x = numpy.min(x_indices), numpy.max(x_indices)
Comment on lines +121 to +124
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may also be expensive. Not entirely sure if the CP measure code does same or is actually different performing voxel bounding coordinates extraction prior to featurization but they use props = skimage.measure.regionprops(masks, pixels) can this be integrated here for efficiency?


image_object = object_loader.image[
min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1
].copy()
Comment on lines +126 to +128
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

object_loader isn't really an object loader here as you are doing the object extraction explicitly off the image getter, more like just plain data loader?

selected_label_object = selected_label_object[
min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1
]
image_object[selected_label_object == 0] = 0
image_object = scale_image(image_object, num_gray_levels=grayscale)
try:
haralick_features = mahotas.features.haralick(
ignore_zeros=True,
f=image_object,
distance=distance,
compute_14th_feature=False,
)
haralick_mean = haralick_features.mean(axis=0)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seemed like CP measure preserves directionality with

n_directions = 13 if pixels.ndim > 2 else 4
...
features[:, :, index] = mahotas.features.haralick(
                label_data, distance=scale, ignore_zeros=True
            )

does averaging out help us or hurt us here?

except ValueError:
haralick_mean = numpy.full(len(feature_names), numpy.nan, dtype=float)
for i, feature_name in enumerate(feature_names):
output_texture_dict["object_id"].append(label)
output_texture_dict["texture_name"].append(
f"{feature_name}-{grayscale}-{distance}"
)
output_texture_dict["texture_value"].append(haralick_mean[i])
gc.collect()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

concur

return output_texture_dict
173 changes: 173 additions & 0 deletions tests/featurization/test_texture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
from typing import Never

import numpy as np
from _pytest.monkeypatch import MonkeyPatch

from zedprofiler.featurization import texture


class DummyObjectLoader:
def __init__(
self,
image: np.ndarray,
label_image: np.ndarray,
object_ids: np.ndarray,
) -> None:
self.image = image
self.label_image = label_image
self.object_ids = object_ids


FEATURE_COUNT = 13
EXPECTED_DISTANCE = 2
FIRST_OBJECT_ID = 1
SECOND_OBJECT_ID = 2
THIRD_OBJECT_ID = 3
EXPECTED_OBJECT_COUNT = 2


def test_scale_image_constant_returns_zeros_uint8() -> None:
image = np.full((2, 3, 4), 7, dtype=np.int16)

out = texture.scale_image(image, num_gray_levels=256)

assert out.dtype == np.uint8
assert out.shape == image.shape
assert np.all(out == 0)


def test_scale_image_maps_min_max_to_requested_levels() -> None:
image = np.array([0, 1023], dtype=np.int32)

out = texture.scale_image(image, num_gray_levels=256)

np.testing.assert_array_equal(out, np.array([0, 255], dtype=np.uint8))


def test_compute_texture_returns_expected_schema_and_lengths(
monkeypatch: MonkeyPatch,
) -> None:
image = np.arange(3 * 3 * 3, dtype=np.uint16).reshape((3, 3, 3))
labels = np.zeros((3, 3, 3), dtype=np.int32)
labels[0, 0, 0] = FIRST_OBJECT_ID
labels[2, 2, 2] = SECOND_OBJECT_ID
loader = DummyObjectLoader(
image=image,
label_image=labels,
object_ids=np.array([FIRST_OBJECT_ID, SECOND_OBJECT_ID]),
)

fake_har = np.tile(np.arange(FEATURE_COUNT, dtype=float), (4, 1))

def fake_haralick(
*,
ignore_zeros: bool,
f: np.ndarray,
distance: int,
compute_14th_feature: bool,
) -> np.ndarray:
assert ignore_zeros is True
assert compute_14th_feature is False
assert distance == EXPECTED_DISTANCE
assert f.dtype == np.uint8
return fake_har

monkeypatch.setattr(texture.mahotas.features, "haralick", fake_haralick)

out = texture.compute_texture(loader, distance=EXPECTED_DISTANCE, grayscale=64)

assert set(out.keys()) == {"object_id", "texture_name", "texture_value"}
assert len(out["object_id"]) == EXPECTED_OBJECT_COUNT * FEATURE_COUNT
assert len(out["texture_name"]) == EXPECTED_OBJECT_COUNT * FEATURE_COUNT
assert len(out["texture_value"]) == EXPECTED_OBJECT_COUNT * FEATURE_COUNT

assert all(name.endswith("-64-2") for name in out["texture_name"])
np.testing.assert_allclose(
out["texture_value"][:FEATURE_COUNT],
np.arange(FEATURE_COUNT, dtype=float),
)
np.testing.assert_allclose(
out["texture_value"][FEATURE_COUNT:],
np.arange(FEATURE_COUNT, dtype=float),
)


def test_compute_texture_valueerror_from_haralick_yields_nan_values(
monkeypatch: MonkeyPatch,
) -> None:
image = np.ones((2, 2, 2), dtype=np.uint16)
labels = np.zeros((2, 2, 2), dtype=np.int32)
labels[0, 0, 0] = THIRD_OBJECT_ID
loader = DummyObjectLoader(
image=image,
label_image=labels,
object_ids=np.array([THIRD_OBJECT_ID]),
)

def raise_value_error(**kwargs: object) -> Never:
assert isinstance(kwargs, dict)
raise ValueError("haralick failed")

monkeypatch.setattr(texture.mahotas.features, "haralick", raise_value_error)

out = texture.compute_texture(loader)

assert len(out["object_id"]) == FEATURE_COUNT
assert out["object_id"] == [THIRD_OBJECT_ID] * FEATURE_COUNT
assert np.isnan(np.array(out["texture_value"], dtype=float)).all()


def test_compute_texture_skips_object_ids_not_present(
monkeypatch: MonkeyPatch,
) -> None:
image = np.arange(8, dtype=np.uint16).reshape((2, 2, 2))
labels = np.zeros((2, 2, 2), dtype=np.int32)
labels[0, 0, 0] = FIRST_OBJECT_ID
loader = DummyObjectLoader(
image=image,
label_image=labels,
object_ids=np.array([FIRST_OBJECT_ID, 99]),
)

def fake_haralick_all_ones(**kwargs: object) -> np.ndarray:
assert isinstance(kwargs, dict)
return np.ones((4, FEATURE_COUNT), dtype=float)

monkeypatch.setattr(
texture.mahotas.features,
"haralick",
fake_haralick_all_ones,
)

out = texture.compute_texture(loader)

assert len(out["object_id"]) == FEATURE_COUNT
assert set(out["object_id"]) == {FIRST_OBJECT_ID}


def test_compute_texture_masks_non_object_voxels_inside_bbox(
monkeypatch: MonkeyPatch,
) -> None:
image = np.array([[[5, 9], [7, 11]]], dtype=np.uint16) # shape (1, 2, 2)
labels = np.array([[[1, 0], [0, 1]]], dtype=np.int32) # same bbox, sparse object
loader = DummyObjectLoader(
image=image,
label_image=labels,
object_ids=np.array([FIRST_OBJECT_ID]),
)

seen = {}

def fake_haralick(*, f: np.ndarray, **kwargs: object) -> np.ndarray:
assert isinstance(kwargs, dict)
seen["f"] = f.copy()
return np.zeros((4, FEATURE_COUNT), dtype=float)

monkeypatch.setattr(texture.mahotas.features, "haralick", fake_haralick)

texture.compute_texture(loader, grayscale=256, distance=1)

assert "f" in seen
# Off-object voxels in the object's bbox should remain zero after masking/scaling
assert seen["f"][0, 0, 1] == 0
assert seen["f"][0, 1, 0] == 0
Loading
Loading