Skip to content

Commit 467656e

Browse files
committed
mv wlen/str_width to traceback
1 parent e8d23cd commit 467656e

File tree

5 files changed

+83
-76
lines changed

5 files changed

+83
-76
lines changed

Lib/_pyrepl/utils.py

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22
import builtins
3-
import functools
43
import keyword
54
import re
65
import token as T
@@ -11,12 +10,12 @@
1110
from collections import deque
1211
from io import StringIO
1312
from tokenize import TokenInfo as TI
13+
from traceback import _str_width as str_width, _wlen as wlen
1414
from typing import Iterable, Iterator, Match, NamedTuple, Self
1515

1616
from .types import CharBuffer, CharWidths
1717
from .trace import trace
1818

19-
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
2019
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
2120
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
2221
IDENTIFIERS_AFTER = {"def", "class"}
@@ -59,32 +58,6 @@ class ColorSpan(NamedTuple):
5958
tag: str
6059

6160

62-
@functools.cache
63-
def str_width(c: str) -> int:
64-
if ord(c) < 128:
65-
return 1
66-
# gh-139246 for zero-width joiner and combining characters
67-
if unicodedata.combining(c):
68-
return 0
69-
category = unicodedata.category(c)
70-
if category == "Cf" and c != "\u00ad":
71-
return 0
72-
w = unicodedata.east_asian_width(c)
73-
if w in ("N", "Na", "H", "A"):
74-
return 1
75-
return 2
76-
77-
78-
def wlen(s: str) -> int:
79-
if len(s) == 1 and s != "\x1a":
80-
return str_width(s)
81-
length = sum(str_width(i) for i in s)
82-
# remove lengths of any escape sequences
83-
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
84-
ctrl_z_cnt = s.count("\x1a")
85-
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
86-
87-
8861
def unbracket(s: str, including_content: bool = False) -> str:
8962
r"""Return `s` with \001 and \002 characters removed.
9063

Lib/test/test_pyrepl/support.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from code import InteractiveConsole
22
from functools import partial
3+
from traceback import ANSI_ESCAPE_SEQUENCE
34
from typing import Iterable
45
from unittest.mock import MagicMock
56

67
from _pyrepl.console import Console, Event
78
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
89
from _pyrepl.simple_interact import _strip_final_indent
9-
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
10+
from _pyrepl.utils import unbracket
1011

1112

1213
class ScreenEqualMixin:

Lib/test/test_pyrepl/test_utils.py

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,9 @@
11
from unittest import TestCase
22

3-
from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
3+
from _pyrepl.utils import prev_next_window, gen_colors
44

55

66
class TestUtils(TestCase):
7-
def test_str_width(self):
8-
characters = [
9-
'a',
10-
'1',
11-
'_',
12-
'!',
13-
'\x1a',
14-
'\u263A',
15-
'\uffb9',
16-
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
17-
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
18-
'\u00ad',
19-
]
20-
for c in characters:
21-
self.assertEqual(str_width(c), 1)
22-
23-
zero_width_characters = [
24-
'\N{COMBINING ACUTE ACCENT}',
25-
'\N{ZERO WIDTH JOINER}',
26-
]
27-
for c in zero_width_characters:
28-
with self.subTest(character=c):
29-
self.assertEqual(str_width(c), 0)
30-
31-
characters = [chr(99989), chr(99999)]
32-
for c in characters:
33-
self.assertEqual(str_width(c), 2)
34-
35-
def test_wlen(self):
36-
for c in ['a', 'b', '1', '!', '_']:
37-
self.assertEqual(wlen(c), 1)
38-
self.assertEqual(wlen('\x1a'), 2)
39-
40-
char_east_asian_width_N = chr(3800)
41-
self.assertEqual(wlen(char_east_asian_width_N), 1)
42-
char_east_asian_width_W = chr(4352)
43-
self.assertEqual(wlen(char_east_asian_width_W), 2)
44-
45-
self.assertEqual(wlen('hello'), 5)
46-
self.assertEqual(wlen('hello' + '\x1a'), 7)
47-
self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
48-
self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
49-
507
def test_prev_next_window(self):
518
def gen_normal():
529
yield 1

Lib/test/test_traceback.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import traceback
2929
from functools import partial
3030
from pathlib import Path
31+
from traceback import _str_width, _wlen
3132
import _colorize
3233

3334
MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
@@ -1787,6 +1788,50 @@ def f():
17871788
]
17881789
self.assertEqual(result_lines, expected)
17891790

1791+
def test_str_width(self):
1792+
characters = [
1793+
'a',
1794+
'1',
1795+
'_',
1796+
'!',
1797+
'\x1a',
1798+
'\u263A',
1799+
'\uffb9',
1800+
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
1801+
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
1802+
'\u00ad',
1803+
]
1804+
for c in characters:
1805+
self.assertEqual(_str_width(c), 1)
1806+
1807+
zero_width_characters = [
1808+
'\N{COMBINING ACUTE ACCENT}',
1809+
'\N{ZERO WIDTH JOINER}',
1810+
]
1811+
for c in zero_width_characters:
1812+
with self.subTest(character=c):
1813+
self.assertEqual(_str_width(c), 0)
1814+
1815+
characters = [chr(99989), chr(99999)]
1816+
for c in characters:
1817+
self.assertEqual(_str_width(c), 2)
1818+
1819+
def test_wlen(self):
1820+
for c in ['a', 'b', '1', '!', '_']:
1821+
self.assertEqual(_wlen(c), 1)
1822+
self.assertEqual(_wlen('\x1a'), 2)
1823+
1824+
char_east_asian_width_N = chr(3800)
1825+
self.assertEqual(_wlen(char_east_asian_width_N), 1)
1826+
char_east_asian_width_W = chr(4352)
1827+
self.assertEqual(_wlen(char_east_asian_width_W), 2)
1828+
1829+
self.assertEqual(_wlen('hello'), 5)
1830+
self.assertEqual(_wlen('hello' + '\x1a'), 7)
1831+
self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
1832+
self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)
1833+
1834+
17901835
class TestKeywordTypoSuggestions(unittest.TestCase):
17911836
TYPO_CASES = [
17921837
("with block ad something:\n pass", "and"),

Lib/traceback.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""Extract, format and print information about Python stack traces."""
22

33
import collections.abc
4+
import functools
45
import itertools
56
import linecache
7+
import re
68
import sys
79
import textwrap
810
import types
@@ -978,6 +980,37 @@ def _zip_display_width(line, carets):
978980
yield char, "".join(itertools.islice(carets, char_width))
979981

980982

983+
@functools.cache
984+
def _str_width(c: str) -> int:
985+
import unicodedata
986+
if ord(c) < 128:
987+
return 1
988+
# gh-139246 for zero-width joiner and combining characters
989+
if unicodedata.combining(c):
990+
return 0
991+
category = unicodedata.category(c)
992+
if category == "Cf" and c != "\u00ad":
993+
return 0
994+
w = unicodedata.east_asian_width(c)
995+
if w in ("N", "Na", "H", "A"):
996+
return 1
997+
return 2
998+
999+
1000+
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
1001+
1002+
1003+
def _wlen(s: str) -> int:
1004+
if len(s) == 1 and s != "\x1a":
1005+
return _str_width(s)
1006+
length = sum(_str_width(i) for i in s)
1007+
# remove lengths of any escape sequences
1008+
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
1009+
ctrl_z_cnt = s.count("\x1a")
1010+
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
1011+
1012+
1013+
9811014
def _display_width(line, offset=None):
9821015
"""Calculate the extra amount of width space the given source
9831016
code segment might take if it were to be displayed on a fixed
@@ -990,9 +1023,7 @@ def _display_width(line, offset=None):
9901023
if line.isascii():
9911024
return offset
9921025

993-
from _pyrepl.utils import wlen
994-
995-
return wlen(line[:offset])
1026+
return _wlen(line[:offset])
9961027

9971028

9981029
class _ExceptionPrintContext:

0 commit comments

Comments
 (0)