Skip to content

Commit 3420e15

Browse files
committed
Address comments and refactor SetExtra implementation
1 parent e2eef9a commit 3420e15

File tree

2 files changed

+104
-64
lines changed

2 files changed

+104
-64
lines changed

Lib/test/test_free_threading/test_code.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
)
3333
GetExtra.restype = ctypes.c_int
3434

35+
# Note: each call to RequestCodeExtraIndex permanently allocates a slot
36+
# (the counter is monotonically increasing), up to MAX_CO_EXTRA_USERS (255).
3537
NTHREADS = 20
3638

3739

@@ -86,11 +88,13 @@ def worker():
8688
self.assertGreaterEqual(idx, 0)
8789

8890
for i in range(LOOP):
89-
SetExtra(code, idx, ctypes.c_voidp(i + 1))
91+
ret = SetExtra(code, idx, ctypes.c_voidp(i + 1))
92+
self.assertEqual(ret, 0)
9093

9194
for _ in range(LOOP):
9295
extra = ctypes.c_voidp()
93-
GetExtra(code, idx, extra)
96+
ret = GetExtra(code, idx, extra)
97+
self.assertEqual(ret, 0)
9498
# The slot was set by this thread, so the value must
9599
# be the last one written.
96100
self.assertEqual(extra.value, LOOP)
@@ -112,19 +116,22 @@ def f():
112116

113117
def worker():
114118
for i in range(LOOP):
115-
SetExtra(code, idx, ctypes.c_voidp(i + 1))
119+
ret = SetExtra(code, idx, ctypes.c_voidp(i + 1))
120+
self.assertEqual(ret, 0)
116121

117122
for _ in range(LOOP):
118123
extra = ctypes.c_voidp()
119-
GetExtra(code, idx, extra)
124+
ret = GetExtra(code, idx, extra)
125+
self.assertEqual(ret, 0)
120126
# Value is set by any writer thread.
121127
self.assertTrue(1 <= extra.value <= LOOP)
122128

123129
run_concurrently(worker_func=worker, nthreads=NTHREADS)
124130

125131
# Every thread's last write is LOOP, so the final value must be LOOP.
126132
extra = ctypes.c_voidp()
127-
GetExtra(code, idx, extra)
133+
ret = GetExtra(code, idx, extra)
134+
self.assertEqual(ret, 0)
128135
self.assertEqual(extra.value, LOOP)
129136

130137

Objects/codeobject.c

Lines changed: 92 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,6 +1581,61 @@ code_extra_size(Py_ssize_t n)
15811581
return sizeof(_PyCodeObjectExtra) + (n - 1) * sizeof(void *);
15821582
}
15831583

1584+
#ifdef Py_GIL_DISABLED
1585+
static int
1586+
code_extra_grow_ft(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra,
1587+
Py_ssize_t old_ce_size, Py_ssize_t new_ce_size,
1588+
Py_ssize_t index, void *extra)
1589+
{
1590+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(co);
1591+
_PyCodeObjectExtra *new_co_extra = PyMem_Malloc(
1592+
code_extra_size(new_ce_size));
1593+
if (new_co_extra == NULL) {
1594+
PyErr_NoMemory();
1595+
return -1;
1596+
}
1597+
1598+
if (old_ce_size > 0) {
1599+
memcpy(new_co_extra->ce_extras, old_co_extra->ce_extras,
1600+
old_ce_size * sizeof(void *));
1601+
}
1602+
for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) {
1603+
new_co_extra->ce_extras[i] = NULL;
1604+
}
1605+
new_co_extra->ce_size = new_ce_size;
1606+
new_co_extra->ce_extras[index] = extra;
1607+
1608+
// Publish new buffer and its contents to lock-free readers.
1609+
FT_ATOMIC_STORE_PTR_RELEASE(co->co_extra, new_co_extra);
1610+
if (old_co_extra != NULL) {
1611+
// QSBR: defer old-buffer free until lock-free readers quiesce.
1612+
_PyMem_FreeDelayed(old_co_extra, code_extra_size(old_ce_size));
1613+
}
1614+
return 0;
1615+
}
1616+
#else
1617+
static int
1618+
code_extra_grow_gil(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra,
1619+
Py_ssize_t old_ce_size, Py_ssize_t new_ce_size,
1620+
Py_ssize_t index, void *extra)
1621+
{
1622+
_PyCodeObjectExtra *new_co_extra = PyMem_Realloc(
1623+
old_co_extra, code_extra_size(new_ce_size));
1624+
if (new_co_extra == NULL) {
1625+
PyErr_NoMemory();
1626+
return -1;
1627+
}
1628+
1629+
for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) {
1630+
new_co_extra->ce_extras[i] = NULL;
1631+
}
1632+
new_co_extra->ce_size = new_ce_size;
1633+
new_co_extra->ce_extras[index] = extra;
1634+
co->co_extra = new_co_extra;
1635+
return 0;
1636+
}
1637+
#endif
1638+
15841639
int
15851640
PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra)
15861641
{
@@ -1589,17 +1644,17 @@ PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra)
15891644
return -1;
15901645
}
15911646

1592-
PyCodeObject *o = (PyCodeObject *)code;
1647+
PyCodeObject *co = (PyCodeObject *)code;
15931648
*extra = NULL;
15941649

15951650
if (index < 0) {
15961651
return 0;
15971652
}
15981653

1599-
// Lock-free read; pairs with release store in SetExtra.
1600-
_PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(o->co_extra);
1654+
// Lock-free read; pairs with release stores in SetExtra.
1655+
_PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co->co_extra);
16011656
if (co_extra != NULL && index < co_extra->ce_size) {
1602-
*extra = co_extra->ce_extras[index];
1657+
*extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co_extra->ce_extras[index]);
16031658
}
16041659

16051660
return 0;
@@ -1611,8 +1666,9 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra)
16111666
{
16121667
PyInterpreterState *interp = _PyInterpreterState_GET();
16131668

1614-
// co_extra_user_count increases monotonically and is published with a
1615-
// release store, so once an index is valid it remains valid.
1669+
// co_extra_user_count is monotonically increasing and published with
1670+
// release store in RequestCodeExtraIndex, so once an index is valid
1671+
// it stays valid.
16161672
Py_ssize_t user_count = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(
16171673
interp->co_extra_user_count);
16181674

@@ -1621,71 +1677,48 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra)
16211677
return -1;
16221678
}
16231679

1624-
PyCodeObject *o = (PyCodeObject *) code;
1625-
int res = 0;
1626-
1627-
Py_BEGIN_CRITICAL_SECTION(o);
1680+
PyCodeObject *co = (PyCodeObject *)code;
1681+
int result = 0;
1682+
void *old_slot_value = NULL;
16281683

1629-
_PyCodeObjectExtra *old_extra = (_PyCodeObjectExtra *) o->co_extra;
1630-
Py_ssize_t old_size = (old_extra == NULL) ? 0 : old_extra->ce_size;
1684+
Py_BEGIN_CRITICAL_SECTION(co);
16311685

1632-
// user_count > index is checked above.
1633-
Py_ssize_t new_size = old_size > index ? old_size : user_count;
1634-
assert(new_size > 0 && new_size > index);
1686+
_PyCodeObjectExtra *old_co_extra = (_PyCodeObjectExtra *)co->co_extra;
1687+
Py_ssize_t old_ce_size = (old_co_extra == NULL)
1688+
? 0 : old_co_extra->ce_size;
16351689

1636-
// Free-threaded builds require copy-on-write to avoid mutating
1637-
// co_extra while lock-free readers in GetExtra may be traversing it.
1638-
// GIL builds could realloc in place, but SetExtra is called rarely
1639-
// and co_extra is small, so use the same path for simplicity.
1640-
_PyCodeObjectExtra *co_extra = PyMem_Malloc(code_extra_size(new_size));
1641-
if (co_extra == NULL) {
1642-
PyErr_NoMemory();
1643-
res = -1;
1690+
// Fast path: slot already exists, update in place.
1691+
if (index < old_ce_size) {
1692+
old_slot_value = old_co_extra->ce_extras[index];
1693+
FT_ATOMIC_STORE_PTR_RELEASE(old_co_extra->ce_extras[index], extra);
16441694
goto done;
16451695
}
16461696

1647-
co_extra->ce_size = new_size;
1648-
1649-
// Copy existing extras from the old buffer.
1650-
if (old_size > 0) {
1651-
memcpy(co_extra->ce_extras, old_extra->ce_extras,
1652-
old_size * sizeof(void *));
1653-
}
1654-
1655-
// NULL-initialize new slots.
1656-
for (Py_ssize_t i = old_size; i < new_size; i++) {
1657-
co_extra->ce_extras[i] = NULL;
1658-
}
1659-
1660-
if (old_extra != NULL && index < old_size &&
1661-
old_extra->ce_extras[index] != NULL)
1662-
{
1663-
// Free the old extra value if a free function was registered.
1664-
// We assume the caller ensures no other thread is concurrently
1665-
// using the old value.
1666-
freefunc free = interp->co_extra_freefuncs[index];
1667-
if (free != NULL) {
1668-
free(old_extra->ce_extras[index]);
1669-
}
1670-
}
1671-
1672-
co_extra->ce_extras[index] = extra;
1673-
1674-
// Publish pointer and slot writes to lock-free readers.
1675-
FT_ATOMIC_STORE_PTR_RELEASE(o->co_extra, co_extra);
1676-
1677-
if (old_extra != NULL) {
1697+
// Slow path: buffer needs to grow.
1698+
Py_ssize_t new_ce_size = user_count;
16781699
#ifdef Py_GIL_DISABLED
1679-
// Defer container free for lock-free readers.
1680-
_PyMem_FreeDelayed(old_extra, code_extra_size(old_size));
1700+
// FT build: allocate new buffer and swap; QSBR reclaims the old one.
1701+
result = code_extra_grow_ft(
1702+
co, old_co_extra, old_ce_size, new_ce_size, index, extra);
16811703
#else
1682-
PyMem_Free(old_extra);
1704+
// GIL build: grow with realloc.
1705+
result = code_extra_grow_gil(
1706+
co, old_co_extra, old_ce_size, new_ce_size, index, extra);
16831707
#endif
1684-
}
16851708

16861709
done:;
16871710
Py_END_CRITICAL_SECTION();
1688-
return res;
1711+
if (old_slot_value != NULL) {
1712+
// Free the old slot value if a free function was registered.
1713+
// The caller must ensure no other thread can still access the old
1714+
// value after this overwrite.
1715+
freefunc free_extra = interp->co_extra_freefuncs[index];
1716+
if (free_extra != NULL) {
1717+
free_extra(old_slot_value);
1718+
}
1719+
}
1720+
1721+
return result;
16891722
}
16901723

16911724

0 commit comments

Comments
 (0)