From f4674b91367f16eac69d5132a043bf0b188ad61a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 15 Apr 2026 11:25:41 +0000 Subject: [PATCH 1/2] unicode_show: escape all non-ASCII in describe_char output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit describe_char used repr(c) to render suspicious characters in the description line. In Python 3, repr() only escapes characters that are not printable, so printable non-ASCII characters — letters (including Cyrillic/Greek/etc. homoglyphs), CJK, emoji, symbols, and combining marks — are passed through literally. This lets a suspicious character slip into unicode-show's own terminal output, defeating the tool's core purpose: a combining acute accent merges with the adjacent quote, a Cyrillic 'а' still reads as Latin 'a', etc. Use ascii(), which always returns an ASCII-only escaped representation, and add a regression test covering letters, homoglyphs, combining marks, CJK, emoji, and currency symbols. --- .../unicode_show/tests/unicode_show.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py index 38f0f9eb..8237697d 100644 --- a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py +++ b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py @@ -319,22 +319,10 @@ def test_printable_non_ascii_chars_are_escaped(self) -> None: """ test_cases: list[tuple[str, str, str, str, str]] = [ - ( - "\u00e9", - "'\\xe9'", - "U+00E9", - "LATIN SMALL LETTER E WITH ACUTE", - "Ll", - ), + ("é", "'\\xe9'", "U+00E9", "LATIN SMALL LETTER E WITH ACUTE", "Ll"), ("\u0430", "'\\u0430'", "U+0430", "CYRILLIC SMALL LETTER A", "Ll"), ("\u0301", "'\\u0301'", "U+0301", "COMBINING ACUTE ACCENT", "Mn"), - ( - "\u6f22", - "'\\u6f22'", - "U+6F22", - "CJK UNIFIED IDEOGRAPH-6F22", - "Lo", - ), + ("漢", "'\\u6f22'", "U+6F22", "CJK UNIFIED IDEOGRAPH-6F22", "Lo"), ("\U0001f600", "'\\U0001f600'", "U+1F600", "GRINNING FACE", "So"), ("\u20ac", "'\\u20ac'", "U+20AC", "EURO SIGN", "Sc"), ] From aab57ac5106924a55c3679b9d2772c500dd59b1d Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 27 Apr 2026 18:14:26 +0000 Subject: [PATCH 2/2] align test formatting with upstream Use \uXXXX escape sequences and multi-line tuple formatting to match the style applied during manual merge. https://claude.ai/code/session_01JiGZC3R3SjVVdNbkUnXjES --- .../unicode_show/tests/unicode_show.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py index 8237697d..38f0f9eb 100644 --- a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py +++ b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py @@ -319,10 +319,22 @@ def test_printable_non_ascii_chars_are_escaped(self) -> None: """ test_cases: list[tuple[str, str, str, str, str]] = [ - ("é", "'\\xe9'", "U+00E9", "LATIN SMALL LETTER E WITH ACUTE", "Ll"), + ( + "\u00e9", + "'\\xe9'", + "U+00E9", + "LATIN SMALL LETTER E WITH ACUTE", + "Ll", + ), ("\u0430", "'\\u0430'", "U+0430", "CYRILLIC SMALL LETTER A", "Ll"), ("\u0301", "'\\u0301'", "U+0301", "COMBINING ACUTE ACCENT", "Mn"), - ("漢", "'\\u6f22'", "U+6F22", "CJK UNIFIED IDEOGRAPH-6F22", "Lo"), + ( + "\u6f22", + "'\\u6f22'", + "U+6F22", + "CJK UNIFIED IDEOGRAPH-6F22", + "Lo", + ), ("\U0001f600", "'\\U0001f600'", "U+1F600", "GRINNING FACE", "So"), ("\u20ac", "'\\u20ac'", "U+20AC", "EURO SIGN", "Sc"), ]