Skip to content

Commit d45e42d

Browse files
committed
gh-151722: Defer GC tracking of frozendict.fromkeys() result until fully built
frozendict.fromkeys() built its result with PyIter_Next() on an already GC-tracked object, so a half-built frozendict was reachable from another thread (using the gc module) and could be observed mutating mid-construction in the free threading build. Untrack the result while it is being filled and re-track it once fully built.
1 parent 6920036 commit d45e42d

3 files changed

Lines changed: 82 additions & 0 deletions

File tree

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import gc
2+
import unittest
3+
from threading import Event, Thread
4+
5+
from test.support import threading_helper
6+
7+
8+
@threading_helper.requires_working_threading()
9+
class TestFrozenDict(unittest.TestCase):
10+
def test_racing_reads_during_fromkeys(self):
11+
# gh-151722: frozendict.fromkeys() builds its result from a generic
12+
# iterable in a fill loop; the result must not be GC-tracked until it
13+
# is fully populated, otherwise a half-built instance is observable
14+
# from other threads. Reading it with len()/repr()/hash() must not
15+
# race with the fill-time table and ma_used writes.
16+
NUM_KEYS = 5000
17+
NUM_ROUNDS = 20
18+
SENTINEL = "test_racing_reads_during_fromkeys_0"
19+
20+
empty = frozendict()
21+
latest = [empty] # main -> reader handoff, never empty
22+
done = Event()
23+
errors = []
24+
25+
def find_half_built():
26+
for obj in gc.get_objects():
27+
if (isinstance(obj, frozendict)
28+
and obj is not empty
29+
and 0 < len(obj) < NUM_KEYS
30+
and SENTINEL in obj):
31+
return obj
32+
return None
33+
34+
class EvilIter:
35+
def __iter__(self):
36+
yield SENTINEL
37+
for i in range(1, NUM_KEYS):
38+
if (i & 0x3FF) == 0 and latest[0] is empty:
39+
obj = find_half_built()
40+
if obj is not None:
41+
latest[0] = obj # leak the half-built object
42+
yield f"k{i}"
43+
44+
def reader():
45+
while not done.is_set():
46+
fd = latest[0]
47+
try:
48+
len(fd)
49+
repr(fd)
50+
hash(fd)
51+
except Exception as exc:
52+
errors.append(exc)
53+
54+
readers = [Thread(target=reader) for _ in range(5)]
55+
for t in readers:
56+
t.start()
57+
try:
58+
for _ in range(NUM_ROUNDS):
59+
latest[0] = empty
60+
frozendict.fromkeys(EvilIter(), 0)
61+
finally:
62+
done.set()
63+
for t in readers:
64+
t.join()
65+
self.assertEqual(errors, [])
66+
67+
68+
if __name__ == "__main__":
69+
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Defer GC tracking of a :class:`frozendict` created by ``frozendict.fromkeys()``
2+
until the end of construction, so a half-built instance is no longer observable
3+
from another thread in the free threading build.

Objects/dictobject.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3519,6 +3519,13 @@ dict_iter_exit:;
35193519
Py_END_CRITICAL_SECTION();
35203520
}
35213521
else if (PyFrozenDict_Check(d)) {
3522+
/* gh-151722: Keep the frozendict untracked while it is being filled,
3523+
so a half-built object is never reachable from another thread
3524+
(using the gc module). */
3525+
int was_tracked = _PyObject_GC_IS_TRACKED(d);
3526+
if (was_tracked) {
3527+
_PyObject_GC_UNTRACK(d);
3528+
}
35223529
while ((key = PyIter_Next(it)) != NULL) {
35233530
// setitem_take2_lock_held consumes a reference to key
35243531
status = setitem_take2_lock_held((PyDictObject *)d,
@@ -3528,6 +3535,9 @@ dict_iter_exit:;
35283535
goto Fail;
35293536
}
35303537
}
3538+
if (was_tracked) {
3539+
_PyObject_GC_TRACK(d);
3540+
}
35313541
}
35323542
else {
35333543
while ((key = PyIter_Next(it)) != NULL) {

0 commit comments

Comments
 (0)