From f3b500738ef73ae28da9c24f843c7c9b582fe4b6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 14 Sep 2025 13:13:10 +0200 Subject: [PATCH 1/5] hook up a ansi removing function --- parse_logs.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/parse_logs.py b/parse_logs.py index 86d2221..977d83c 100644 --- a/parse_logs.py +++ b/parse_logs.py @@ -14,6 +14,10 @@ test_collection_stage = "test collection session" +def strip_ansi(msg): + pass + + @dataclass class SessionStart: pytest_version: str @@ -45,6 +49,9 @@ class PreformattedReport: variant: str | None message: str + def __post_init__(self): + self.message = strip_ansi(self.message) + @dataclass class CollectionError: From 7a2453974794f97c96b2f263ac9816f6bd716840 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 14 Sep 2025 13:41:48 +0200 Subject: [PATCH 2/5] use a regular expression to remove ansi escapes --- parse_logs.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/parse_logs.py b/parse_logs.py index 977d83c..91b2bcd 100644 --- a/parse_logs.py +++ b/parse_logs.py @@ -12,10 +12,29 @@ from pytest import CollectReport, TestReport test_collection_stage = "test collection session" +fe_bytes = "[\x40-\x5f]" +parameter_bytes = "[\x30-\x3f]" +intermediate_bytes = "[\x20-\x2f]" +final_bytes = "[\x40-\x7e]" +ansi_fe_escape_re = re.compile( + rf""" + \x1B # ESC + (?: + {fe_bytes} # single-byte Fe + | + \[ # CSI + {parameter_bytes}* + {intermediate_bytes}* + {final_bytes}* + ) + """, + re.VERBOSE, +) def strip_ansi(msg): - pass + """strip all ansi escape sequences""" + return ansi_fe_escape_re.sub("", msg) @dataclass From 6352a1edea960ccc0195264be773ee6455aa2f3d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 14 Sep 2025 14:43:20 +0200 Subject: [PATCH 3/5] prioritize CSI over C1 --- parse_logs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parse_logs.py b/parse_logs.py index 91b2bcd..756aa19 100644 --- a/parse_logs.py +++ b/parse_logs.py @@ -20,12 +20,11 @@ rf""" \x1B # ESC (?: - {fe_bytes} # single-byte Fe - | \[ # CSI {parameter_bytes}* {intermediate_bytes}* - {final_bytes}* + {final_bytes} + | {fe_bytes} # single-byte Fe ) """, re.VERBOSE, From 8688b0cb04e26c222802f8a978f11904ac6b6407 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 14 Sep 2025 19:24:48 +0200 Subject: [PATCH 4/5] check that `strip_ansi` works properly --- test_parse_log.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test_parse_log.py b/test_parse_log.py index 0c419b9..d9263a7 100644 --- a/test_parse_log.py +++ b/test_parse_log.py @@ -19,6 +19,30 @@ messages = st.text() +def ansi_csi_escapes(): + parameter_bytes = st.lists(st.characters(min_codepoint=0x30, max_codepoint=0x3F)) + intermediate_bytes = st.lists(st.characters(min_codepoint=0x20, max_codepoint=0x2F)) + final_bytes = st.characters(min_codepoint=0x40, max_codepoint=0x7E) + + return st.builds( + lambda *args: "".join(["\x1b[", *args]), + parameter_bytes.map("".join), + intermediate_bytes.map("".join), + final_bytes, + ) + + +def ansi_c1_escapes(): + byte_ = st.characters( + codec="ascii", min_codepoint=0x40, max_codepoint=0x5F, exclude_characters=["["] + ) + return st.builds(lambda b: f"\x1b{b}", byte_) + + +def ansi_fe_escapes(): + return ansi_csi_escapes() | ansi_c1_escapes() + + def preformatted_reports(): return st.tuples(filepaths, names, variants | st.none(), messages).map( lambda x: parse_logs.PreformattedReport(*x) @@ -47,3 +71,15 @@ def test_truncate(reports, max_chars): formatted = parse_logs.truncate(reports, max_chars=max_chars, py_version=py_version) assert formatted is None or len(formatted) <= max_chars + + +@given(st.lists(ansi_fe_escapes()).map("".join)) +def test_strip_ansi_multiple(escapes): + assert parse_logs.strip_ansi(escapes) == "" + + +@given(ansi_fe_escapes()) +def test_strip_ansi(escape): + message = f"some {escape}text" + + assert parse_logs.strip_ansi(message) == "some text" From 2151fae1377f64be713b0749a74ca1e61806436a Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 14 Sep 2025 19:48:48 +0200 Subject: [PATCH 5/5] check that pre-formatted report strips ansi escapes --- test_parse_log.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_parse_log.py b/test_parse_log.py index d9263a7..9407b3f 100644 --- a/test_parse_log.py +++ b/test_parse_log.py @@ -83,3 +83,11 @@ def test_strip_ansi(escape): message = f"some {escape}text" assert parse_logs.strip_ansi(message) == "some text" + + +@given(ansi_fe_escapes()) +def test_preformatted_report_ansi(escape): + actual = parse_logs.PreformattedReport( + filepath="a", name="b", variant=None, message=f"{escape}text" + ) + assert actual.message == "text"