From f4674b91367f16eac69d5132a043bf0b188ad61a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 15 Apr 2026 11:25:41 +0000
Subject: [PATCH 1/2] unicode_show: escape all non-ASCII in describe_char
 output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

describe_char used repr(c) to render suspicious characters in the
description line. In Python 3, repr() only escapes characters that are
not printable, so printable non-ASCII characters — letters (including
Cyrillic/Greek/etc. homoglyphs), CJK, emoji, symbols, and combining
marks — are passed through literally. This lets a suspicious character
slip into unicode-show's own terminal output, defeating the tool's core
purpose: a combining acute accent merges with the adjacent quote, a
Cyrillic 'а' still reads as Latin 'a', etc.

Use ascii(), which always returns an ASCII-only escaped representation,
and add a regression test covering letters, homoglyphs, combining marks,
CJK, emoji, and currency symbols.
---
 .../unicode_show/tests/unicode_show.py           | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
index 38f0f9eb..8237697d 100644
--- a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
+++ b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
@@ -319,22 +319,10 @@ def test_printable_non_ascii_chars_are_escaped(self) -> None:
         """
 
         test_cases: list[tuple[str, str, str, str, str]] = [
-            (
-                "\u00e9",
-                "'\\xe9'",
-                "U+00E9",
-                "LATIN SMALL LETTER E WITH ACUTE",
-                "Ll",
-            ),
+            ("é", "'\\xe9'", "U+00E9", "LATIN SMALL LETTER E WITH ACUTE", "Ll"),
             ("\u0430", "'\\u0430'", "U+0430", "CYRILLIC SMALL LETTER A", "Ll"),
             ("\u0301", "'\\u0301'", "U+0301", "COMBINING ACUTE ACCENT", "Mn"),
-            (
-                "\u6f22",
-                "'\\u6f22'",
-                "U+6F22",
-                "CJK UNIFIED IDEOGRAPH-6F22",
-                "Lo",
-            ),
+            ("漢", "'\\u6f22'", "U+6F22", "CJK UNIFIED IDEOGRAPH-6F22", "Lo"),
             ("\U0001f600", "'\\U0001f600'", "U+1F600", "GRINNING FACE", "So"),
             ("\u20ac", "'\\u20ac'", "U+20AC", "EURO SIGN", "Sc"),
         ]

From aab57ac5106924a55c3679b9d2772c500dd59b1d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 27 Apr 2026 18:14:26 +0000
Subject: [PATCH 2/2] align test formatting with upstream

Use \uXXXX escape sequences and multi-line tuple formatting to
match the style applied during manual merge.

https://claude.ai/code/session_01JiGZC3R3SjVVdNbkUnXjES
---
 .../unicode_show/tests/unicode_show.py           | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
index 8237697d..38f0f9eb 100644
--- a/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
+++ b/usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
@@ -319,10 +319,22 @@ def test_printable_non_ascii_chars_are_escaped(self) -> None:
         """
 
         test_cases: list[tuple[str, str, str, str, str]] = [
-            ("é", "'\\xe9'", "U+00E9", "LATIN SMALL LETTER E WITH ACUTE", "Ll"),
+            (
+                "\u00e9",
+                "'\\xe9'",
+                "U+00E9",
+                "LATIN SMALL LETTER E WITH ACUTE",
+                "Ll",
+            ),
             ("\u0430", "'\\u0430'", "U+0430", "CYRILLIC SMALL LETTER A", "Ll"),
             ("\u0301", "'\\u0301'", "U+0301", "COMBINING ACUTE ACCENT", "Mn"),
-            ("漢", "'\\u6f22'", "U+6F22", "CJK UNIFIED IDEOGRAPH-6F22", "Lo"),
+            (
+                "\u6f22",
+                "'\\u6f22'",
+                "U+6F22",
+                "CJK UNIFIED IDEOGRAPH-6F22",
+                "Lo",
+            ),
             ("\U0001f600", "'\\U0001f600'", "U+1F600", "GRINNING FACE", "So"),
             ("\u20ac", "'\\u20ac'", "U+20AC", "EURO SIGN", "Sc"),
         ]