Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ZedProfiler

[![Coverage](https://img.shields.io/badge/coverage-87%25-green)](#quality-gates)
[![Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen)](#quality-gates)

CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling.

Expand Down
23 changes: 23 additions & 0 deletions src/zedprofiler/IO/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from .feature_writing_utils import (
FeatureMetadata,
format_morphology_feature_name,
remove_underscores_from_string,
save_features_as_parquet,
)
from .loading_classes import (
ImageSetConfig,
ImageSetLoader,
ObjectLoader,
TwoObjectLoader,
)

__all__ = [
"FeatureMetadata",
"ImageSetConfig",
"ImageSetLoader",
"ObjectLoader",
"TwoObjectLoader",
"format_morphology_feature_name",
"remove_underscores_from_string",
"save_features_as_parquet",
]
124 changes: 124 additions & 0 deletions src/zedprofiler/IO/feature_writing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Functions for formatting morphology feature names in a consistent way.

Formats morphology feature names and saves features as parquet files.
"""

from __future__ import annotations

import dataclasses
import pathlib

import pandas


def remove_underscores_from_string(string: str) -> str:
"""
Remove unwanted delimiters from a string and replace them with hyphens.

Parameters
----------
string : str
The string to remove unwanted delimiters from.

Returns
-------
str
The string with unwanted delimiters removed and replaced with hyphens.
"""
if not isinstance(string, str):
try:
string = str(string)
except Exception as e:
msg = (
f"Input string must be a string or convertible to a string. "
f"Received input: {string} of type {type(string)}"
)
raise ValueError(msg) from e
string = string.translate(
str.maketrans(
{
"_": "-",
".": "-",
" ": "-",
"/": "-",
}
)
)

return string


def format_morphology_feature_name(
compartment: str, channel: str, feature_type: str, measurement: str
) -> str:
"""
Format a morphology feature name in a consistent way across all morphology features.
This format follows specification for the following:
https://github.com/WayScience/NF1_3D_organoid_profiling_pipeline/blob/main/docs/RFC-2119-Feature-Naming-Convention.md

Parameters
----------
compartment : str
The compartment name.
channel : str
The channel name.
feature_type : str
The feature type.
measurement : str
The measurement name.

Returns
-------
str
The formatted feature name.
"""

compartment = remove_underscores_from_string(compartment)
channel = remove_underscores_from_string(channel)
feature_type = remove_underscores_from_string(feature_type)
measurement = remove_underscores_from_string(measurement)

return f"{compartment}_{channel}_{feature_type}_{measurement}"


@dataclasses.dataclass
class FeatureMetadata:
"""Metadata for feature output."""

compartment: str
channel: str
feature_type: str
cpu_or_gpu: str


def save_features_as_parquet(
parent_path: pathlib.Path,
df: pandas.DataFrame,
metadata: FeatureMetadata,
) -> pathlib.Path:
"""Save features as parquet files in a consistent way.

Saves features as parquet files with consistent naming across morphology
features.

Parameters
----------
parent_path : pathlib.Path
The parent path to save the features to.
df : pandas.DataFrame
The dataframe containing the features to save.
metadata : FeatureMetadata
Metadata for the feature output (compartment, channel, feature_type,
cpu_or_gpu).

Returns
-------
pathlib.Path
"""
save_path = (
parent_path
/ f"{metadata.compartment}_{metadata.channel}_{metadata.feature_type}_"
f"{metadata.cpu_or_gpu}_features.parquet"
)
df.to_parquet(save_path, index=False)
return save_path
Loading
Loading