From d94438d0e0ef3efde05d1a4774405269c80f0dc6 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Thu, 12 Mar 2026 10:59:05 -0400 Subject: [PATCH] fix: handle non-alphanumeric characters in infer_pattern (#102) Fixed RuntimeError when inferring patterns from files with special characters (periods, hyphens, underscores) in their names, particularly affecting files with .ome.tif extensions. The pattern inference algorithm now properly checks if non-alphanumeric characters match before throwing an error, allowing them to be included in the inferred pattern. Changes: - Modified swSearch traceback logic in pattern.cpp to handle matching special characters - Added comprehensive regression tests in test_inferpattern_special_chars.py Fixes #102 Co-Authored-By: Claude Sonnet 4.5 --- src/filepattern/cpp/pattern.cpp | 8 ++- tests/test_inferpattern_special_chars.py | 67 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tests/test_inferpattern_special_chars.py diff --git a/src/filepattern/cpp/pattern.cpp b/src/filepattern/cpp/pattern.cpp index 7ff6c2d..258e285 100644 --- a/src/filepattern/cpp/pattern.cpp +++ b/src/filepattern/cpp/pattern.cpp @@ -644,7 +644,13 @@ std::string Pattern::swSearch(std::string& pattern, std::string& filename, const patternTemplate = "@" + patternTemplate; } } else { - throw std::runtime_error("Non-numeric, non-alphabetic characters found that do not match"); + // Handle special characters (non-numeric, non-alphabetic) + // If they match, include them directly in the pattern + if(filename[col-1] == pattern[row-1] && (lastCol != col || lastRow != row)){ + patternTemplate = filename[col-1] + patternTemplate; + } else { + throw std::runtime_error("Non-numeric, non-alphabetic characters found that do not match"); + } } } else if(lastCol != col && lastRow != row){ // progrsssion was made so add a placeholder diff --git a/tests/test_inferpattern_special_chars.py b/tests/test_inferpattern_special_chars.py new file mode 100644 index 0000000..c9b821f --- /dev/null +++ b/tests/test_inferpattern_special_chars.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +""" +Tests for pattern inference with special (non-alphanumeric) characters in filenames. + +Regression tests for GitHub issue #102, which caused RuntimeError when files +contained special characters like periods, hyphens, or underscores in their names +(e.g., .ome.tif extension). The error message was "Non-numeric, non-alphabetic +characters found that do not match" during pattern inference. +""" + +import pytest +import filepattern as fp + + +class TestInferPatternSpecialChars(): + """Test cases for pattern inference with special characters in filenames.""" + + def test_ome_tif_extension(self): + """Test inference with .ome.tif double extension.""" + files = [ + "image_001.ome.tif", + "image_002.ome.tif", + "image_003.ome.tif" + ] + pattern = fp.infer_pattern(files=files) + # Should successfully infer pattern without throwing RuntimeError + assert pattern == "image_00{r:d}.ome.tif" + + def test_multiple_periods(self): + """Test inference with multiple periods in filename.""" + files = [ + "exp.data.v1.001.tif", + "exp.data.v1.002.tif", + "exp.data.v1.003.tif" + ] + pattern = fp.infer_pattern(files=files) + assert pattern == "exp.data.v1.00{r:d}.tif" + + def test_hyphens_and_underscores(self): + """Test inference with hyphens and underscores.""" + files = [ + "sample-001_exp_01.tif", + "sample-002_exp_02.tif", + "sample-003_exp_03.tif" + ] + pattern = fp.infer_pattern(files=files) + assert pattern == "sample-00{r:d}_exp_0{t:d}.tif" + + def test_complex_special_chars(self): + """Test inference with multiple types of special characters.""" + files = [ + "Week1_22141.ome.001.tif", + "Week1_22141.ome.002.tif", + "Week1_22141.ome.003.tif" + ] + pattern = fp.infer_pattern(files=files) + assert pattern == "Week1_22141.ome.00{r:d}.tif" + + def test_ome_tif_with_variable_naming(self): + """Test .ome.tif files with custom variable names.""" + files = [ + "cell_x01_y02_z03.ome.tif", + "cell_x04_y05_z06.ome.tif", + "cell_x07_y08_z09.ome.tif" + ] + pattern = fp.infer_pattern(files=files, variables="xyz") + assert pattern == "cell_x0{x:d}_y0{y:d}_z0{z:d}.ome.tif"