Skip to content

Commit 2995187

Browse files
committed
cleanp
1 parent f7fe3be commit 2995187

3 files changed

Lines changed: 50 additions & 41 deletions

File tree

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
#include "internal/pycore_llist.h" // struct llist_node
3131
#include "internal/pycore_long.h" // _PyLong_GetZero
3232
#include "internal/pycore_pyerrors.h" // _PyErr_FormatFromCause
33+
#include "internal/pycore_pyhash.h" // _Py_HashPointerRaw
3334
#include "internal/pycore_stackref.h" // Py_TAG_BITS
3435
#include "../../Python/remote_debug.h"
3536

@@ -270,16 +271,22 @@ typedef struct {
270271
uint64_t batched_read_segments_completed; // Segments completed by batched reads
271272
} UnwinderStats;
272273

274+
#if defined(__GNUC__) || defined(__clang__)
275+
# define REMOTE_DEBUG_UNLIKELY(value) __builtin_expect(!!(value), 0)
276+
#else
277+
# define REMOTE_DEBUG_UNLIKELY(value) (value)
278+
#endif
279+
273280
/* Stats tracking macros - no-op when stats collection is disabled */
274281
#define STATS_INC(unwinder, field) \
275-
do { if ((unwinder)->collect_stats) (unwinder)->stats.field++; } while(0)
282+
do { if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) (unwinder)->stats.field++; } while(0)
276283

277284
#define STATS_ADD(unwinder, field, val) \
278-
do { if ((unwinder)->collect_stats) (unwinder)->stats.field += (val); } while(0)
285+
do { if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) (unwinder)->stats.field += (val); } while(0)
279286

280287
#define STATS_BATCHED_READ(unwinder, requested, completed) \
281288
do { \
282-
if ((unwinder)->collect_stats) { \
289+
if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) { \
283290
(unwinder)->stats.batched_read_attempts++; \
284291
(unwinder)->stats.batched_read_segments_requested += (uint64_t)(requested); \
285292
(unwinder)->stats.batched_read_segments_completed += (uint64_t)(completed); \
@@ -292,31 +299,6 @@ typedef struct {
292299
} \
293300
} while(0)
294301

295-
static inline int
296-
_Py_RemoteDebug_CountCompletedSegments(
297-
const _Py_RemoteReadSegment *segments,
298-
int nsegs,
299-
Py_ssize_t nread)
300-
{
301-
if (nread < 0) {
302-
return 0;
303-
}
304-
305-
int completed = 0;
306-
Py_ssize_t bytes_needed = 0;
307-
for (int i = 0; i < nsegs; i++) {
308-
if (segments[i].size > (size_t)(PY_SSIZE_T_MAX - bytes_needed)) {
309-
break;
310-
}
311-
bytes_needed += (Py_ssize_t)segments[i].size;
312-
if (nread < bytes_needed) {
313-
break;
314-
}
315-
completed++;
316-
}
317-
return completed;
318-
}
319-
320302
typedef struct {
321303
PyTypeObject *RemoteDebugging_Type;
322304
PyTypeObject *TaskInfo_Type;
@@ -368,10 +350,12 @@ typedef struct {
368350
int cache_frames;
369351
int collect_stats; // whether to collect statistics
370352
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
371-
InterpreterThreadCacheEntry cached_tstates[INTERPRETER_THREAD_CACHE_SIZE];
353+
uintptr_t cached_tstate_interpreter_addr; // hot last-interpreter prediction
354+
uintptr_t cached_tstate_addr; // hot first-thread prediction
372355
RemoteDebuggingState *cached_state;
373356
FrameCacheEntry *frame_cache; // preallocated array of FRAME_CACHE_MAX_THREADS entries
374357
UnwinderStats stats; // statistics for performance analysis
358+
InterpreterThreadCacheEntry cached_tstates[INTERPRETER_THREAD_CACHE_SIZE];
375359
#ifdef Py_GIL_DISABLED
376360
uint32_t tlbc_generation;
377361
_Py_hashtable_t *tlbc_cache;

Modules/_remote_debugging/module.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
361361
self->cache_frames = cache_frames;
362362
self->collect_stats = stats;
363363
self->stale_invalidation_counter = 0;
364+
self->cached_tstate_interpreter_addr = 0;
365+
self->cached_tstate_addr = 0;
364366
memset(self->cached_tstates, 0, sizeof(self->cached_tstates));
365367
self->debug = debug;
366368
self->only_active_thread = only_active_thread;
@@ -478,13 +480,10 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
478480
static size_t
479481
interpreter_thread_cache_index(uintptr_t interpreter_addr)
480482
{
481-
// The interpreter ID lives in PyInterpreterState, which is the state we are
482-
// trying to prefetch. At this point the cheap stable key is the remote
483-
// interpreter address, so use it for a small direct-mapped prediction cache.
484-
// The full address is stored in each entry and checked on lookup, so hash
485-
// collisions are detected as misses; storing a colliding entry only replaces
486-
// the previous prediction and cannot return the wrong thread state.
487-
return ((interpreter_addr >> 4) ^ (interpreter_addr >> 12))
483+
// Direct-mapped table indexed by the remote interpreter address. Each entry
484+
// stores the full address and verifies it on lookup, so hash collisions
485+
// degrade to misses and cannot return a wrong tstate.
486+
return (size_t)_Py_HashPointerRaw((const void *)interpreter_addr)
488487
& (INTERPRETER_THREAD_CACHE_SIZE - 1);
489488
}
490489

@@ -497,9 +496,15 @@ get_cached_tstate_for_interpreter(
497496
return 0;
498497
}
499498

499+
if (self->cached_tstate_interpreter_addr == interpreter_addr) {
500+
return self->cached_tstate_addr;
501+
}
502+
500503
InterpreterThreadCacheEntry *entry =
501504
&self->cached_tstates[interpreter_thread_cache_index(interpreter_addr)];
502505
if (entry->interpreter_addr == interpreter_addr) {
506+
self->cached_tstate_interpreter_addr = interpreter_addr;
507+
self->cached_tstate_addr = entry->thread_state_addr;
503508
return entry->thread_state_addr;
504509
}
505510
return 0;
@@ -515,6 +520,9 @@ set_cached_tstate_for_interpreter(
515520
return;
516521
}
517522

523+
self->cached_tstate_interpreter_addr = interpreter_addr;
524+
self->cached_tstate_addr = thread_state_addr;
525+
518526
InterpreterThreadCacheEntry *entry =
519527
&self->cached_tstates[interpreter_thread_cache_index(interpreter_addr)];
520528
entry->interpreter_addr = interpreter_addr;
@@ -584,8 +592,19 @@ read_interp_state_and_maybe_thread_frame(
584592
int nsegs = prefetch->frame_addr != 0 ? 3 : 2;
585593
Py_ssize_t nread = _Py_RemoteDebug_BatchedReadRemoteMemory(
586594
&unwinder->handle, segments, nsegs);
587-
int completed = _Py_RemoteDebug_CountCompletedSegments(
588-
segments, nsegs, nread);
595+
int completed = 0;
596+
if (nread >= (Py_ssize_t)INTERP_STATE_BUFFER_SIZE) {
597+
completed = 1;
598+
Py_ssize_t with_tstate = (Py_ssize_t)INTERP_STATE_BUFFER_SIZE
599+
+ (Py_ssize_t)tstate_size;
600+
if (nread >= with_tstate) {
601+
completed = 2;
602+
}
603+
if (nsegs == 3
604+
&& nread == with_tstate + (Py_ssize_t)SIZEOF_INTERP_FRAME) {
605+
completed = 3;
606+
}
607+
}
589608
STATS_BATCHED_READ(unwinder, nsegs, completed);
590609
if (completed >= 1) {
591610
if (completed >= 2) {

Modules/_remote_debugging/threads.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,13 @@ read_thread_state_and_maybe_frame(
307307
};
308308
Py_ssize_t nread = _Py_RemoteDebug_BatchedReadRemoteMemory(
309309
&unwinder->handle, segments, 2);
310-
int completed = _Py_RemoteDebug_CountCompletedSegments(segments, 2, nread);
310+
int completed = 0;
311+
if (nread >= (Py_ssize_t)tstate_size) {
312+
completed = 1;
313+
if (nread == (Py_ssize_t)(tstate_size + SIZEOF_INTERP_FRAME)) {
314+
completed = 2;
315+
}
316+
}
311317
STATS_BATCHED_READ(unwinder, 2, completed);
312318
if (completed >= 1) {
313319
*frame_read = completed == 2;
@@ -335,9 +341,9 @@ unwind_stack_for_thread(
335341
char ts[SIZEOF_THREAD_STATE];
336342
char local_prefetched_frame[SIZEOF_INTERP_FRAME];
337343
RemoteReadPrefetch ctx_prefetch = {0};
338-
if (prefetch && prefetch->tstate && prefetch->tstate_addr == *current_tstate) {
344+
if (prefetch->tstate && prefetch->tstate_addr == *current_tstate) {
339345
memcpy(ts, prefetch->tstate, (size_t)unwinder->debug_offsets.thread_state.size);
340-
if (prefetch->frame && prefetch->frame_addr != 0) {
346+
if (prefetch->frame) {
341347
ctx_prefetch.frame = prefetch->frame;
342348
ctx_prefetch.frame_addr = prefetch->frame_addr;
343349
}

0 commit comments

Comments
 (0)