Skip to content

Commit 7b9102c

Browse files
Merge branch 'main' into fix/pickle-reduce-args-tuple-check
2 parents e5b6b56 + 837166f commit 7b9102c

File tree

10 files changed

+286
-64
lines changed

10 files changed

+286
-64
lines changed

Doc/library/mailbox.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF.
7878
message. Failing to lock the mailbox runs the risk of losing messages or
7979
corrupting the entire mailbox.
8080

81+
The :class:`!Mailbox` class supports the :keyword:`with` statement. When used
82+
as a context manager, :class:`!Mailbox` calls :meth:`lock` when the context is entered,
83+
returns the mailbox object as the context object, and at context end calls :meth:`close`,
84+
thereby releasing the lock.
85+
86+
.. versionchanged:: next
87+
Support for the :keyword:`with` statement was added.
88+
8189
:class:`!Mailbox` instances have the following methods:
8290

8391

Lib/mailbox.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ def __init__(self, path, factory=None, create=True):
3939
self._path = os.path.abspath(os.path.expanduser(path))
4040
self._factory = factory
4141

42+
def __enter__(self):
43+
self.lock()
44+
return self
45+
46+
def __exit__(self, type, value, traceback):
47+
self.close()
48+
4249
def add(self, message):
4350
"""Add message and return assigned key."""
4451
raise NotImplementedError('Method must be implemented by subclass')

Lib/test/test_mailbox.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,11 @@ def _test_flush_or_close(self, method, should_call_close):
542542
self.assertIn(self._box.get_string(key), contents)
543543
oldbox.close()
544544

545+
def test_use_context_manager(self):
546+
# Mailboxes are usable as a context manager
547+
with self._box as box:
548+
self.assertIs(self._box, box)
549+
545550
def test_dump_message(self):
546551
# Write message representations to disk
547552
for input in (email.message_from_string(_sample_message),
@@ -1122,6 +1127,16 @@ def test_ownership_after_flush(self):
11221127
self.assertEqual(st.st_gid, other_gid)
11231128
self.assertEqual(st.st_mode, mode)
11241129

1130+
def test_context_manager_locks_and_closes(self):
1131+
# Context manager locks/unlocks and closes.
1132+
# (This test uses an implementation detail to get the state.)
1133+
self.assertFalse(self._box._locked)
1134+
with self._box as context_object:
1135+
self.assertIs(self._box, context_object)
1136+
self.assertTrue(self._box._locked)
1137+
self.assertFalse(self._box._file.closed)
1138+
self.assertFalse(self._box._locked)
1139+
self.assertTrue(self._box._file.closed)
11251140

11261141
class _TestMboxMMDF(_TestSingleFile):
11271142

Lib/test/test_ucn.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,30 @@ def test_cjk_unified_ideographs(self):
111111
self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
112112
self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")
113113

114+
def test_tangut_ideographs(self):
115+
self.checkletter("TANGUT IDEOGRAPH-17000", "\U00017000")
116+
self.checkletter("TANGUT IDEOGRAPH-187FF", "\U000187ff")
117+
self.checkletter("TANGUT IDEOGRAPH-18D00", "\U00018D00")
118+
self.checkletter("TANGUT IDEOGRAPH-18D1E", "\U00018d1e")
119+
self.checkletter("tangut ideograph-18d1e", "\U00018d1e")
120+
121+
def test_egyptian_hieroglyphs(self):
122+
self.checkletter("EGYPTIAN HIEROGLYPH-13460", "\U00013460")
123+
self.checkletter("EGYPTIAN HIEROGLYPH-143FA", "\U000143fa")
124+
self.checkletter("egyptian hieroglyph-143fa", "\U000143fa")
125+
126+
def test_khitan_small_script_characters(self):
127+
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18B00", "\U00018b00")
128+
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CD5", "\U00018cd5")
129+
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
130+
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
131+
self.checkletter("khitan small script character-18cff", "\U00018cff")
132+
133+
def test_nushu_characters(self):
134+
self.checkletter("NUSHU CHARACTER-1B170", "\U0001b170")
135+
self.checkletter("NUSHU CHARACTER-1B2FB", "\U0001b2fb")
136+
self.checkletter("nushu character-1b2fb", "\U0001b2fb")
137+
114138
def test_bmp_characters(self):
115139
for code in range(0x10000):
116140
char = chr(code)

Lib/test/test_unicodedata.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,60 @@ def test_function_checksum(self):
128128
result = h.hexdigest()
129129
self.assertEqual(result, self.expectedchecksum)
130130

131+
def test_name(self):
132+
name = self.db.name
133+
self.assertRaises(ValueError, name, '\0')
134+
self.assertRaises(ValueError, name, '\n')
135+
self.assertRaises(ValueError, name, '\x1F')
136+
self.assertRaises(ValueError, name, '\x7F')
137+
self.assertRaises(ValueError, name, '\x9F')
138+
self.assertRaises(ValueError, name, '\uFFFE')
139+
self.assertRaises(ValueError, name, '\uFFFF')
140+
self.assertRaises(ValueError, name, '\U0010FFFF')
141+
self.assertEqual(name('\U0010FFFF', 42), 42)
142+
143+
self.assertEqual(name(' '), 'SPACE')
144+
self.assertEqual(name('1'), 'DIGIT ONE')
145+
self.assertEqual(name('A'), 'LATIN CAPITAL LETTER A')
146+
self.assertEqual(name('\xA0'), 'NO-BREAK SPACE')
147+
self.assertEqual(name('\u0221', None), None if self.old else
148+
'LATIN SMALL LETTER D WITH CURL')
149+
self.assertEqual(name('\u3400'), 'CJK UNIFIED IDEOGRAPH-3400')
150+
self.assertEqual(name('\u9FA5'), 'CJK UNIFIED IDEOGRAPH-9FA5')
151+
self.assertEqual(name('\uAC00'), 'HANGUL SYLLABLE GA')
152+
self.assertEqual(name('\uD7A3'), 'HANGUL SYLLABLE HIH')
153+
self.assertEqual(name('\uF900'), 'CJK COMPATIBILITY IDEOGRAPH-F900')
154+
self.assertEqual(name('\uFA6A'), 'CJK COMPATIBILITY IDEOGRAPH-FA6A')
155+
self.assertEqual(name('\uFBF9'),
156+
'ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA '
157+
'ABOVE WITH ALEF MAKSURA ISOLATED FORM')
158+
self.assertEqual(name('\U00013460', None), None if self.old else
159+
'EGYPTIAN HIEROGLYPH-13460')
160+
self.assertEqual(name('\U000143FA', None), None if self.old else
161+
'EGYPTIAN HIEROGLYPH-143FA')
162+
self.assertEqual(name('\U00017000', None), None if self.old else
163+
'TANGUT IDEOGRAPH-17000')
164+
self.assertEqual(name('\U00018B00', None), None if self.old else
165+
'KHITAN SMALL SCRIPT CHARACTER-18B00')
166+
self.assertEqual(name('\U00018CD5', None), None if self.old else
167+
'KHITAN SMALL SCRIPT CHARACTER-18CD5')
168+
self.assertEqual(name('\U00018CFF', None), None if self.old else
169+
'KHITAN SMALL SCRIPT CHARACTER-18CFF')
170+
self.assertEqual(name('\U00018D1E', None), None if self.old else
171+
'TANGUT IDEOGRAPH-18D1E')
172+
self.assertEqual(name('\U0001B170', None), None if self.old else
173+
'NUSHU CHARACTER-1B170')
174+
self.assertEqual(name('\U0001B2FB', None), None if self.old else
175+
'NUSHU CHARACTER-1B2FB')
176+
self.assertEqual(name('\U0001FBA8', None), None if self.old else
177+
'BOX DRAWINGS LIGHT DIAGONAL UPPER CENTRE TO '
178+
'MIDDLE LEFT AND MIDDLE RIGHT TO LOWER CENTRE')
179+
self.assertEqual(name('\U0002A6D6'), 'CJK UNIFIED IDEOGRAPH-2A6D6')
180+
self.assertEqual(name('\U0002FA1D'), 'CJK COMPATIBILITY IDEOGRAPH-2FA1D')
181+
self.assertEqual(name('\U00033479', None), None if self.old else
182+
'CJK UNIFIED IDEOGRAPH-33479')
183+
184+
@requires_resource('cpu')
131185
def test_name_inverse_lookup(self):
132186
for char in iterallchars():
133187
looked_name = self.db.name(char, None)
@@ -151,6 +205,17 @@ def test_lookup_nonexistant(self):
151205
"HANDBUG",
152206
"MODIFIER LETTER CYRILLIC SMALL QUESTION MARK",
153207
"???",
208+
"CJK UNIFIED IDEOGRAPH-03400",
209+
"CJK UNIFIED IDEOGRAPH-020000",
210+
"CJK UNIFIED IDEOGRAPH-33FF",
211+
"CJK UNIFIED IDEOGRAPH-F900",
212+
"CJK UNIFIED IDEOGRAPH-13460",
213+
"CJK UNIFIED IDEOGRAPH-17000",
214+
"CJK UNIFIED IDEOGRAPH-18B00",
215+
"CJK UNIFIED IDEOGRAPH-1B170",
216+
"CJK COMPATIBILITY IDEOGRAPH-3400",
217+
"TANGUT IDEOGRAPH-3400",
218+
"HANGUL SYLLABLE AC00",
154219
]:
155220
self.assertRaises(KeyError, self.db.lookup, nonexistent)
156221

@@ -613,7 +678,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
613678
# (e.g. 'make distclean && make') to get the correct checksum.
614679
expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
615680
if quicktest else
616-
'65670ae03a324c5f9e826a4de3e25bae4d73c9b7')
681+
'180bdc91143d8aa2eb9dd6726e66d37606205942')
682+
683+
@requires_resource('network')
684+
def test_all_names(self):
685+
TESTDATAFILE = "DerivedName.txt"
686+
testdata = download_test_data_file(TESTDATAFILE)
687+
688+
with testdata:
689+
self.run_name_tests(testdata)
690+
691+
def run_name_tests(self, testdata):
692+
names_ref = {}
693+
694+
def parse_cp(s):
695+
return int(s, 16)
696+
697+
# Parse data
698+
for line in testdata:
699+
line = line.strip()
700+
if not line or line.startswith("#"):
701+
continue
702+
raw_cp, name = line.split("; ")
703+
# Check for a range
704+
if ".." in raw_cp:
705+
cp1, cp2 = map(parse_cp, raw_cp.split(".."))
706+
# remove ‘*’ at the end
707+
assert name[-1] == '*', (raw_cp, name)
708+
name = name[:-1]
709+
for cp in range(cp1, cp2 + 1):
710+
names_ref[cp] = f"{name}{cp:04X}"
711+
elif name[-1] == '*':
712+
cp = parse_cp(raw_cp)
713+
name = name[:-1]
714+
names_ref[cp] = f"{name}{cp:04X}"
715+
else:
716+
assert '*' not in name, (raw_cp, name)
717+
cp = parse_cp(raw_cp)
718+
names_ref[cp] = name
719+
720+
for cp in range(0, sys.maxunicode + 1):
721+
self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))
617722

618723
def test_isxidstart(self):
619724
self.assertTrue(self.db.isxidstart('S'))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:class:`mailbox.Mailbox` instances can now be used as a context manager.
2+
The Mailbox is locked on context entry and unlocked and closed at context exit.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support for Tangut Ideographs names in :mod:`unicodedata`.

Modules/unicodedata.c

Lines changed: 63 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,22 +1052,18 @@ static const char * const hangul_syllables[][3] = {
10521052
{ 0, 0, "H" }
10531053
};
10541054

1055-
/* These ranges need to match makeunicodedata.py:cjk_ranges. */
10561055
static int
1057-
is_unified_ideograph(Py_UCS4 code)
1058-
{
1059-
return
1060-
(0x3400 <= code && code <= 0x4DBF) || /* CJK Ideograph Extension A */
1061-
(0x4E00 <= code && code <= 0x9FFF) || /* CJK Ideograph */
1062-
(0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
1063-
(0x2A700 <= code && code <= 0x2B73F) || /* CJK Ideograph Extension C */
1064-
(0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
1065-
(0x2B820 <= code && code <= 0x2CEAD) || /* CJK Ideograph Extension E */
1066-
(0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
1067-
(0x2EBF0 <= code && code <= 0x2EE5D) || /* CJK Ideograph Extension I */
1068-
(0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
1069-
(0x31350 <= code && code <= 0x323AF) || /* CJK Ideograph Extension H */
1070-
(0x323B0 <= code && code <= 0x33479); /* CJK Ideograph Extension J */
1056+
find_prefix_id(Py_UCS4 code)
1057+
{
1058+
for (int i = 0; i < (int)Py_ARRAY_LENGTH(derived_name_ranges); i++) {
1059+
if (code < derived_name_ranges[i].first) {
1060+
return -1;
1061+
}
1062+
if (code <= derived_name_ranges[i].last) {
1063+
return derived_name_ranges[i].prefixid;
1064+
}
1065+
}
1066+
return -1;
10711067
}
10721068

10731069
/* macros used to determine if the given code point is in the PUA range that
@@ -1345,7 +1341,9 @@ _getucname(PyObject *self,
13451341
}
13461342
}
13471343

1348-
if (SBase <= code && code < SBase+SCount) {
1344+
int prefixid = find_prefix_id(code);
1345+
if (prefixid == 0) {
1346+
assert(SBase <= code && code < SBase+SCount);
13491347
/* Hangul syllable. */
13501348
int SIndex = code - SBase;
13511349
int L = SIndex / NCount;
@@ -1367,11 +1365,11 @@ _getucname(PyObject *self,
13671365
return 1;
13681366
}
13691367

1370-
if (is_unified_ideograph(code)) {
1371-
if (buflen < 28)
1372-
/* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
1368+
if (prefixid > 0) {
1369+
const char *prefix = derived_name_prefixes[prefixid];
1370+
if (snprintf(buffer, buflen, "%s%04X", prefix, code) >= buflen) {
13731371
return 0;
1374-
sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
1372+
}
13751373
return 1;
13761374
}
13771375

@@ -1428,6 +1426,35 @@ _check_alias_and_seq(Py_UCS4* code, int with_named_seq)
14281426
return 1;
14291427
}
14301428

1429+
static Py_UCS4
1430+
parse_hex_code(const char *name, int namelen)
1431+
{
1432+
if (namelen < 4 || namelen > 6) {
1433+
return (Py_UCS4)-1;
1434+
}
1435+
if (*name == '0') {
1436+
return (Py_UCS4)-1;
1437+
}
1438+
int v = 0;
1439+
while (namelen--) {
1440+
v *= 16;
1441+
Py_UCS1 c = Py_TOUPPER(*name);
1442+
if (c >= '0' && c <= '9') {
1443+
v += c - '0';
1444+
}
1445+
else if (c >= 'A' && c <= 'F') {
1446+
v += c - 'A' + 10;
1447+
}
1448+
else {
1449+
return (Py_UCS4)-1;
1450+
}
1451+
name++;
1452+
}
1453+
if (v > 0x10ffff) {
1454+
return (Py_UCS4)-1;
1455+
}
1456+
return v;
1457+
}
14311458

14321459
static int
14331460
_getcode(const char* name, int namelen, Py_UCS4* code)
@@ -1436,8 +1463,19 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
14361463
* Named aliases are not resolved, they are returned as a code point in the
14371464
* PUA */
14381465

1439-
/* Check for hangul syllables. */
1440-
if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) {
1466+
int i = 0;
1467+
size_t prefixlen;
1468+
for (; i < (int)Py_ARRAY_LENGTH(derived_name_prefixes); i++) {
1469+
const char *prefix = derived_name_prefixes[i];
1470+
prefixlen = strlen(derived_name_prefixes[i]);
1471+
if (PyOS_strnicmp(name, prefix, prefixlen) == 0) {
1472+
break;
1473+
}
1474+
}
1475+
1476+
if (i == 0) {
1477+
/* Hangul syllables. */
1478+
assert(PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0);
14411479
int len, L = -1, V = -1, T = -1;
14421480
const char *pos = name + 16;
14431481
find_syllable(pos, &len, &L, LCount, 0);
@@ -1454,28 +1492,11 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
14541492
return 0;
14551493
}
14561494

1457-
/* Check for unified ideographs. */
1458-
if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
1459-
/* Four or five hexdigits must follow. */
1460-
unsigned int v;
1461-
v = 0;
1462-
name += 22;
1463-
namelen -= 22;
1464-
if (namelen != 4 && namelen != 5)
1495+
if (i < (int)Py_ARRAY_LENGTH(derived_name_prefixes)) {
1496+
Py_UCS4 v = parse_hex_code(name + prefixlen, namelen - prefixlen);
1497+
if (find_prefix_id(v) != i) {
14651498
return 0;
1466-
while (namelen--) {
1467-
v *= 16;
1468-
Py_UCS1 c = Py_TOUPPER(*name);
1469-
if (c >= '0' && c <= '9')
1470-
v += c - '0';
1471-
else if (c >= 'A' && c <= 'F')
1472-
v += c - 'A' + 10;
1473-
else
1474-
return 0;
1475-
name++;
14761499
}
1477-
if (!is_unified_ideograph(v))
1478-
return 0;
14791500
*code = v;
14801501
return 1;
14811502
}

0 commit comments

Comments
 (0)