Fix traceback color output with unicode characters

grayjk · grayjk · commit 72ffc53e4e1a · 2025-12-10T12:57:27.000-05:00
Closes #130273
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
@@ -5226,5 +5226,32 @@ def expected(t, m, fn, l, f, E, e, z):
         ]
         self.assertEqual(actual, expected(**colors))
 
+    def test_colorized_traceback_unicode(self):
+        try:
+            啊哈=1; 啊哈/0####
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
+                f"            {e}~~~~{z}{E}^{z}{e}~{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
+        try:
+            ééééé/0
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    {E}ééééé{z}/0",
+                f"    {E}^^^^^{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/traceback.py b/Lib/traceback.py
@@ -680,12 +680,12 @@ def output_line(lineno):
                         colorized_line_parts = []
                         colorized_carets_parts = []
 
-                        for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
+                        for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
                             caret_group = list(group)
-                            if color == "^":
+                            if "^" in color:
                                 colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
-                            elif color == "~":
+                            elif "~" in color:
                                 colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
                             else:
@@ -967,7 +967,24 @@ def setup_positions(expr, force_valid=True):
 
     return None
 
-_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _lookahead(iterator, default):
+    forked = itertools.tee(iterator, 1)[0]
+    return next(forked, default)
+
+
+def _zip_display_width(line, carets):
+    line = itertools.tee(line, 1)[0]
+    carets = iter(carets)
+    for char in line:
+        char_width = _display_width(char)
+        next_char = _lookahead(line, "")
+        if next_char and char_width == _display_width(char + next_char):
+            next(line)
+            yield char + next_char, "".join(itertools.islice(carets, char_width))
+        else:
+            yield char, "".join(itertools.islice(carets, char_width))
+
 
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
@@ -981,13 +998,9 @@ def _display_width(line, offset=None):
     if line.isascii():
         return offset
 
-    import unicodedata
-
-    return sum(
-        2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
-        for char in line[:offset]
-    )
+    from _pyrepl.utils import wlen
 
+    return wlen(line[:offset])
 
 
 class _ExceptionPrintContext:
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst
@@ -0,0 +1 @@
+Fix traceback color output with unicode characters

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fix traceback color output with unicode characters`