Skip to content

Commit 46a0b2c

Browse files
committed
Address review feedback
1 parent 7a85c9a commit 46a0b2c

7 files changed

Lines changed: 299 additions & 97 deletions

File tree

Lib/profiling/sampling/sample.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,37 @@ def _print_unwinder_stats(self):
296296
print(f" Hits: {code_hits:n} ({ANSIColors.GREEN}{fmt(code_hits_pct)}%{ANSIColors.RESET})")
297297
print(f" Misses: {code_misses:n} ({ANSIColors.RED}{fmt(code_misses_pct)}%{ANSIColors.RESET})")
298298

299+
# Batched remote read stats
300+
batched_attempts = stats.get('batched_read_attempts', 0)
301+
batched_successes = stats.get('batched_read_successes', 0)
302+
batched_misses = stats.get('batched_read_misses', 0)
303+
segments_requested = stats.get('batched_read_segments_requested', 0)
304+
segments_completed = stats.get('batched_read_segments_completed', 0)
305+
if batched_attempts > 0 or segments_requested > 0:
306+
batched_success_rate = stats.get('batched_read_success_rate', 0.0)
307+
batched_miss_rate = (
308+
(batched_misses / batched_attempts * 100)
309+
if batched_attempts > 0 else 0
310+
)
311+
segment_completion_rate = stats.get(
312+
'batched_read_segment_completion_rate', 0.0
313+
)
314+
315+
print(f" {ANSIColors.CYAN}Batched Reads:{ANSIColors.RESET}")
316+
print(f" Attempts: {batched_attempts:n}")
317+
print(
318+
f" Successes: {batched_successes:n} "
319+
f"({ANSIColors.GREEN}{fmt(batched_success_rate)}%{ANSIColors.RESET})"
320+
)
321+
print(
322+
f" Misses: {batched_misses:n} "
323+
f"({ANSIColors.RED}{fmt(batched_miss_rate)}%{ANSIColors.RESET})"
324+
)
325+
print(
326+
f" Segments read: {segments_completed:n}/{segments_requested:n} "
327+
f"({ANSIColors.GREEN}{fmt(segment_completion_rate)}%{ANSIColors.RESET})"
328+
)
329+
299330
# Memory operations
300331
memory_reads = stats.get('memory_reads', 0)
301332
memory_bytes = stats.get('memory_bytes_read', 0)

Lib/test/test_external_inspection.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,13 @@ def test_get_stats(self):
37673767
"frames_read_from_cache",
37683768
"frames_read_from_memory",
37693769
"frame_cache_hit_rate",
3770+
"batched_read_attempts",
3771+
"batched_read_successes",
3772+
"batched_read_misses",
3773+
"batched_read_segments_requested",
3774+
"batched_read_segments_completed",
3775+
"batched_read_success_rate",
3776+
"batched_read_segment_completion_rate",
37703777
]
37713778
for key in expected_keys:
37723779
self.assertIn(key, stats)

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,23 @@ typedef struct {
233233
PyObject *frame_list; // owned reference, NULL if empty
234234
} FrameCacheEntry;
235235

236+
#define INTERPRETER_THREAD_CACHE_SIZE 32
237+
#if (INTERPRETER_THREAD_CACHE_SIZE & (INTERPRETER_THREAD_CACHE_SIZE - 1)) != 0
238+
# error "INTERPRETER_THREAD_CACHE_SIZE must be a power of two"
239+
#endif
240+
241+
typedef struct {
242+
uintptr_t interpreter_addr;
243+
uintptr_t thread_state_addr;
244+
} InterpreterThreadCacheEntry;
245+
246+
typedef struct {
247+
const char *tstate;
248+
uintptr_t tstate_addr;
249+
const char *frame;
250+
uintptr_t frame_addr;
251+
} RemoteReadPrefetch;
252+
236253
/* Statistics for profiling performance analysis */
237254
typedef struct {
238255
uint64_t total_samples; // Total number of get_stack_trace calls
@@ -246,6 +263,11 @@ typedef struct {
246263
uint64_t code_object_cache_hits; // Code object cache hits
247264
uint64_t code_object_cache_misses; // Code object cache misses
248265
uint64_t stale_cache_invalidations; // Times stale entries were cleared
266+
uint64_t batched_read_attempts; // Batched remote-read attempts
267+
uint64_t batched_read_successes; // Attempts that read all requested segments
268+
uint64_t batched_read_misses; // Attempts that fell back or partially read
269+
uint64_t batched_read_segments_requested; // Segments requested by batched reads
270+
uint64_t batched_read_segments_completed; // Segments completed by batched reads
249271
} UnwinderStats;
250272

251273
/* Stats tracking macros - no-op when stats collection is disabled */
@@ -255,6 +277,46 @@ typedef struct {
255277
#define STATS_ADD(unwinder, field, val) \
256278
do { if ((unwinder)->collect_stats) (unwinder)->stats.field += (val); } while(0)
257279

280+
#define STATS_BATCHED_READ(unwinder, requested, completed) \
281+
do { \
282+
if ((unwinder)->collect_stats) { \
283+
(unwinder)->stats.batched_read_attempts++; \
284+
(unwinder)->stats.batched_read_segments_requested += (uint64_t)(requested); \
285+
(unwinder)->stats.batched_read_segments_completed += (uint64_t)(completed); \
286+
if ((completed) == (requested)) { \
287+
(unwinder)->stats.batched_read_successes++; \
288+
} \
289+
else { \
290+
(unwinder)->stats.batched_read_misses++; \
291+
} \
292+
} \
293+
} while(0)
294+
295+
static inline int
296+
_Py_RemoteDebug_CountCompletedSegments(
297+
const _Py_RemoteReadSegment *segments,
298+
int nsegs,
299+
Py_ssize_t nread)
300+
{
301+
if (nread < 0) {
302+
return 0;
303+
}
304+
305+
int completed = 0;
306+
Py_ssize_t bytes_needed = 0;
307+
for (int i = 0; i < nsegs; i++) {
308+
if (segments[i].size > (size_t)(PY_SSIZE_T_MAX - bytes_needed)) {
309+
break;
310+
}
311+
bytes_needed += (Py_ssize_t)segments[i].size;
312+
if (nread < bytes_needed) {
313+
break;
314+
}
315+
completed++;
316+
}
317+
return completed;
318+
}
319+
258320
typedef struct {
259321
PyTypeObject *RemoteDebugging_Type;
260322
PyTypeObject *TaskInfo_Type;
@@ -306,7 +368,7 @@ typedef struct {
306368
int cache_frames;
307369
int collect_stats; // whether to collect statistics
308370
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
309-
uintptr_t cached_tstate_addr; // predicted first thread for batched reads
371+
InterpreterThreadCacheEntry cached_tstates[INTERPRETER_THREAD_CACHE_SIZE];
310372
RemoteDebuggingState *cached_state;
311373
FrameCacheEntry *frame_cache; // preallocated array of FRAME_CACHE_MAX_THREADS entries
312374
UnwinderStats stats; // statistics for performance analysis
@@ -372,8 +434,7 @@ typedef struct {
372434
uintptr_t last_profiled_frame; // Last cached frame (0 if no cache)
373435
StackChunkList *chunks; // Pre-copied stack chunks
374436
int skip_first_frame; // Skip frame_addr itself (continue from its caller)
375-
const char *prefetched_frame; // Optional already-read frame buffer
376-
uintptr_t prefetched_frame_addr; // Remote address for prefetched_frame
437+
RemoteReadPrefetch prefetch; // Optional already-read thread/frame buffers
377438

378439
/* Outputs */
379440
PyObject *frame_info; // List to append FrameInfo objects
@@ -616,10 +677,7 @@ extern PyObject* unwind_stack_for_thread(
616677
uintptr_t gil_holder_tstate,
617678
uintptr_t gc_frame,
618679
uintptr_t main_thread_tstate,
619-
const char *prefetched_tstate,
620-
uintptr_t prefetched_tstate_addr,
621-
const char *prefetched_frame,
622-
uintptr_t prefetched_frame_addr
680+
const RemoteReadPrefetch *prefetch
623681
);
624682

625683
/* Thread stopping functions (for blocking mode) */

Modules/_remote_debugging/clinic/module.c.h

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/_remote_debugging/frames.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,9 @@ process_frame_chain(
332332
{
333333
uintptr_t address_of_code_object = 0;
334334
int parse_result;
335-
if (ctx->prefetched_frame && ctx->prefetched_frame_addr == frame_addr) {
335+
if (ctx->prefetch.frame && ctx->prefetch.frame_addr == frame_addr) {
336336
parse_result = parse_frame_buffer(
337-
unwinder, &frame, ctx->prefetched_frame,
337+
unwinder, &frame, ctx->prefetch.frame,
338338
&address_of_code_object, &next_frame_addr);
339339
}
340340
else {
@@ -530,9 +530,9 @@ try_full_cache_hit(
530530
uintptr_t code_object_addr = 0;
531531
uintptr_t previous_frame = 0;
532532
int parse_result;
533-
if (ctx->prefetched_frame && ctx->prefetched_frame_addr == ctx->frame_addr) {
533+
if (ctx->prefetch.frame && ctx->prefetch.frame_addr == ctx->frame_addr) {
534534
parse_result = parse_frame_buffer(unwinder, &current_frame,
535-
ctx->prefetched_frame,
535+
ctx->prefetch.frame,
536536
&code_object_addr, &previous_frame);
537537
}
538538
else {

0 commit comments

Comments
 (0)