Skip to content

Commit 5e2993f

Browse files
committed
[3.14] gh-144438: Fix false sharing between QSBR and tlbc_index (gh-144554)
Align the QSBR thread state array to a 64-byte cache line boundary and add padding at the end of _PyThreadStateImpl. Depending on heap layout, the QSBR array could end up sharing a cache line with a thread's tlbc_index, causing QSBR quiescent state updates to contend with reads of tlbc_index in RESUME_CHECK. This is sensitive to earlier allocations during interpreter init and can appear or disappear with seemingly unrelated changes. Either change alone is sufficient to fix the specific issue, but both are worthwhile to avoid similar problems in the future. (cherry picked from commit 6577d87) Co-authored-by: Sam Gross <colesbury@gmail.com>
1 parent b1accd5 commit 5e2993f

File tree

4 files changed

+23
-7
lines changed

4 files changed

+23
-7
lines changed

Include/internal/pycore_qsbr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,9 @@ struct _qsbr_shared {
8383
// Minimum observed read sequence of all QSBR thread states
8484
uint64_t rd_seq;
8585

86-
// Array of QSBR thread states.
86+
// Array of QSBR thread states (aligned to 64 bytes).
8787
struct _qsbr_pad *array;
88+
void *array_raw; // raw allocation pointer (for free)
8889
Py_ssize_t size;
8990

9091
// Freelist of unused _qsbr_thread_states (protected by mutex)

Include/internal/pycore_tstate.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ typedef struct _PyThreadStateImpl {
8080
uintptr_t c_stack_init_base;
8181
uintptr_t c_stack_init_top;
8282

83+
#ifdef Py_GIL_DISABLED
84+
// gh-144438: Add padding to ensure that the fields above don't share a
85+
// cache line with other allocations.
86+
char __padding[64];
87+
#endif
8388
} _PyThreadStateImpl;
8489

8590
#ifdef __cplusplus
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Align the QSBR thread state array to a 64-byte cache line boundary to
2+
avoid false sharing in the :term:`free-threaded build`.

Python/qsbr.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,29 @@ grow_thread_array(struct _qsbr_shared *shared)
8484
new_size = MIN_ARRAY_SIZE;
8585
}
8686

87-
struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
88-
if (array == NULL) {
87+
// Overallocate by 63 bytes so we can align to a 64-byte boundary.
88+
// This avoids potential false sharing between the first entry and other
89+
// allocations.
90+
size_t alignment = 64;
91+
size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1;
92+
void *raw = PyMem_RawCalloc(1, alloc_size);
93+
if (raw == NULL) {
8994
return -1;
9095
}
96+
struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment);
9197

92-
struct _qsbr_pad *old = shared->array;
93-
if (old != NULL) {
98+
void *old_raw = shared->array_raw;
99+
if (shared->array != NULL) {
94100
memcpy(array, shared->array, shared->size * sizeof(*array));
95101
}
96102

97103
shared->array = array;
104+
shared->array_raw = raw;
98105
shared->size = new_size;
99106
shared->freelist = NULL;
100107
initialize_new_array(shared);
101108

102-
PyMem_RawFree(old);
109+
PyMem_RawFree(old_raw);
103110
return 0;
104111
}
105112

@@ -256,8 +263,9 @@ void
256263
_Py_qsbr_fini(PyInterpreterState *interp)
257264
{
258265
struct _qsbr_shared *shared = &interp->qsbr;
259-
PyMem_RawFree(shared->array);
266+
PyMem_RawFree(shared->array_raw);
260267
shared->array = NULL;
268+
shared->array_raw = NULL;
261269
shared->size = 0;
262270
shared->freelist = NULL;
263271
}

0 commit comments

Comments
 (0)