Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ RUN apt-get update \
python3.13-dev \
python3.13-dbg \
python3.13-venv \
python3.14-dev \
python3.14-dbg \
python3.14-venv \
make \
cmake \
gdb \
Expand Down
5 changes: 5 additions & 0 deletions news/279.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix an issue where the PID offset could not be determined when multiple
subinterpreters were present. Previously, pystack only checked the first
interpreter in the linked list, which was not guaranteed to be the main
interpreter. The fix now iterates over all interpreters and correctly locates
the TID.
3 changes: 3 additions & 0 deletions news/279.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add support for subinterpreters when reporting pure Python stacks. Threads
running in subinterpreters are now identified and grouped by interpreter ID.
Native stack reporting for subinterpreters is not yet supported.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"src/pystack/_pystack.pyx",
"src/pystack/_pystack/corefile.cpp",
"src/pystack/_pystack/elf_common.cpp",
"src/pystack/_pystack/interpreter.cpp",
"src/pystack/_pystack/logging.cpp",
"src/pystack/_pystack/mem.cpp",
"src/pystack/_pystack/process.cpp",
Expand Down
2 changes: 0 additions & 2 deletions src/pystack/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from ._version import __version__
from .traceback_formatter import print_thread

__all__ = [
"__version__",
"print_thread",
]
34 changes: 27 additions & 7 deletions src/pystack/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
from pystack.process import is_gzip

from . import errors
from . import print_thread
from .colors import colored
from .engine import CoreFileAnalyzer
from .engine import NativeReportingMode
from .engine import StackMethod
from .engine import get_process_threads
from .engine import get_process_threads_for_core
from .traceback_formatter import TracebackPrinter

PERMISSION_ERROR_MSG = "Operation not permitted"
NO_SUCH_PROCESS_ERROR_MSG = "No such process"
Expand Down Expand Up @@ -285,14 +285,24 @@ def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) ->
if not args.block and args.native_mode != NativeReportingMode.OFF:
parser.error("Native traces are only available in blocking mode")

for thread in get_process_threads(
threads = get_process_threads(
args.pid,
stop_process=args.block,
native_mode=args.native_mode,
locals=args.locals,
method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO,
):
print_thread(thread, args.native_mode)
)

has_multiple_subinterpreters = (
len(set(thread.interpreter_id for thread in threads)) > 1
)

printer = TracebackPrinter(
native_mode=args.native_mode,
include_subinterpreters=has_multiple_subinterpreters,
)
for thread in threads:
printer.print_thread(thread)


def format_psinfo_information(psinfo: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -414,15 +424,25 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N
elf_id if elf_id else "<MISSING>",
)

for thread in get_process_threads_for_core(
threads = get_process_threads_for_core(
corefile,
executable,
library_search_path=lib_search_path,
native_mode=args.native_mode,
locals=args.locals,
method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO,
):
print_thread(thread, args.native_mode)
)

has_multiple_subinterpreters = (
len(set(thread.interpreter_id for thread in threads)) > 1
)

printer = TracebackPrinter(
args.native_mode, include_subinterpreters=has_multiple_subinterpreters
)

for thread in threads:
printer.print_thread(thread)


if __name__ == "__main__": # pragma: no cover
Expand Down
104 changes: 98 additions & 6 deletions src/pystack/_pystack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ from _pystack.elf_common cimport CoreFileAnalyzer as NativeCoreFileAnalyzer
from _pystack.elf_common cimport ProcessAnalyzer as NativeProcessAnalyzer
from _pystack.elf_common cimport SectionInfo
from _pystack.elf_common cimport getSectionInfo
from _pystack.interpreter cimport InterpreterUtils
from _pystack.logging cimport initializePythonLoggerInterface
from _pystack.mem cimport AbstractRemoteMemoryManager
from _pystack.mem cimport MemoryMapInformation as CppMemoryMapInformation
Expand Down Expand Up @@ -65,6 +66,7 @@ from .types import NativeFrame
from .types import PyCodeObject
from .types import PyFrame
from .types import PyThread
from .types import frame_type

LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -462,6 +464,7 @@ cdef object _construct_threads_from_interpreter_state(
bint add_native_traces,
bint resolve_locals,
):
interpreter_id = InterpreterUtils.getInterpreterId(manager, head)
LOGGER.info("Fetching Python threads")
threads = []

Expand All @@ -486,7 +489,9 @@ cdef object _construct_threads_from_interpreter_state(
current_thread.isGilHolder(),
current_thread.isGCCollecting(),
python_version,
interpreter_id,
name=get_thread_name(pid, current_thread.Tid()),
stack_anchor=current_thread.StackAnchor(),
)
)
current_thread = (
Expand All @@ -495,6 +500,78 @@ cdef object _construct_threads_from_interpreter_state(

return threads


def _entry_frame_count(thread: PyThread) -> int:
return sum(1 for frame in thread.all_frames if frame.is_entry)


def _slice_native_stacks_for_same_tid_threads(threads) -> None:
canonical = next((thread for thread in threads if thread.native_frames), None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I follow this at all. Won't each of the threads in a group have its own copy of exactly the same list of native frames? I understand how this could ever return anything but the first thread in the list (unless native_frames is an empty list for all of them, I guess)

if canonical is None:
return

canonical_frames = list(canonical.native_frames)
eval_positions = [
index
for index, native_frame in enumerate(canonical_frames)
if frame_type(native_frame, canonical.python_version) == NativeFrame.FrameType.EVAL
]
if not eval_positions:
return

entry_counts = [_entry_frame_count(thread) for thread in threads]
if sum(entry_counts) != len(eval_positions):
LOGGER.debug(
"Skipping same-tid native slicing for tid %s due to mismatched counts: "
"entry=%s eval=%s",
threads[0].tid,
sum(entry_counts),
len(eval_positions),
)
return

ordered_threads = sorted(
enumerate(threads),
key=lambda item: (
item[1].stack_anchor is None, # Everything with a stack anchor before everything without one
-(item[1].stack_anchor or 0), # The stack anchor in descending order
item[0], # The index of the item in the list in ascending order
),
)

cursor = 0
for _, thread in ordered_threads:
required_eval_frames = _entry_frame_count(thread)
if required_eval_frames == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could this happen? If there is a Python stack, there will always be at least 1 entry frame, right?

I guess this could happen if we've acquired the GIL (so there's a thread state) but we don't yet have any Python stack for that thread state?

thread.native_frames = []
continue

group_start = cursor
group_end = cursor + required_eval_frames
prev_eval = eval_positions[group_start - 1] if group_start > 0 else -1
next_eval = (
eval_positions[group_end]
if group_end < len(eval_positions)
else len(canonical_frames)
Comment on lines +553 to +555
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, how could group_end < len(eval_positions) be false, given that we checked above that sum(entry_counts) == len(eval_positions)? We know that things pair up correctly, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is handling the end of the positions, when it is False it means we want the rest of the canonical_frames

# for only 1 thread
required_eval_frames = 1
eval_positions = [0]

group_start = 0
group_end = 0 + required_eval_frames # = 1

# therefore group_end == len(eval_positions) as we are at end end
next_eval = 1

)
thread.native_frames = canonical_frames[prev_eval + 1 : next_eval]
cursor = group_end


def _normalize_python_threads(threads, native_mode: NativeReportingMode):
if native_mode == NativeReportingMode.OFF:
return threads

threads_by_tid = {}
for thread in threads:
threads_by_tid.setdefault(thread.tid, []).append(thread)

for group in threads_by_tid.values():
if len(group) <= 1:
continue
_slice_native_stacks_for_same_tid_threads(group)
return threads

cdef object _construct_os_thread(
shared_ptr[AbstractProcessManager] manager, int pid, int tid
):
Expand Down Expand Up @@ -622,7 +699,8 @@ def _get_process_threads(
)

all_tids = list(manager.get().Tids())
if head:
threads = []
while head:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a minor nit, but - this winds up printing the subinterpreters from the most recently spawned to the least, printing the main interpreter last. I wonder if it would be more useful to reverse the list of interpreters, so that the main interpreter's threads are printed first...

What do you think, @pablogsal ?

add_native_traces = native_mode != NativeReportingMode.OFF
for thread in _construct_threads_from_interpreter_state(
manager,
Expand All @@ -634,7 +712,11 @@ def _get_process_threads(
):
if thread.tid in all_tids:
all_tids.remove(thread.tid)
yield thread
threads.append(thread)
head = InterpreterUtils.getNextInterpreter(manager, head)

for thread in _normalize_python_threads(threads, native_mode):
yield thread

if native_mode == NativeReportingMode.ALL:
yield from _construct_os_threads(manager, pid, all_tids)
Expand Down Expand Up @@ -768,15 +850,25 @@ def _get_process_threads_for_core(
)

all_tids = list(manager.get().Tids())
threads = []

if head:
native = native_mode in {NativeReportingMode.PYTHON, NativeReportingMode.ALL}
while head:
add_native_traces = native_mode != NativeReportingMode.OFF
for thread in _construct_threads_from_interpreter_state(
manager, head, pymanager.pid, pymanager.python_version, native, locals
manager,
head,
pymanager.pid,
pymanager.python_version,
add_native_traces,
locals,
):
if thread.tid in all_tids:
all_tids.remove(thread.tid)
yield thread
threads.append(thread)
head = InterpreterUtils.getNextInterpreter(manager, head)

for thread in _normalize_python_threads(threads, native_mode):
yield thread

if native_mode == NativeReportingMode.ALL:
yield from _construct_os_threads(manager, pymanager.pid, all_tids)
3 changes: 2 additions & 1 deletion src/pystack/_pystack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_library(_pystack STATIC
pythread.cpp
version.cpp
elf_common.cpp
pytypes.cpp)
pytypes.cpp
interpreter.cpp)
set_property(TARGET _pystack PROPERTY POSITION_INDEPENDENT_CODE ON)
include_directories("." "cpython" ${PYTHON_INCLUDE_DIRS})
4 changes: 2 additions & 2 deletions src/pystack/_pystack/cpython/interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,10 @@ struct _gil_runtime_state
int locked;
unsigned long switch_number;
pthread_cond_t cond;
pthread_cond_t mutex;
pthread_mutex_t mutex;
#ifdef FORCE_SWITCHING
pthread_cond_t switch_cond;
pthread_cond_t switch_mutex;
pthread_mutex_t switch_mutex;
#endif
};

Expand Down
36 changes: 36 additions & 0 deletions src/pystack/_pystack/interpreter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <memory>

#include "interpreter.h"
#include "logging.h"
#include "process.h"
#include "structure.h"
#include "version.h"

namespace pystack {

remote_addr_t
InterpreterUtils::getNextInterpreter(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr)
{
Structure<py_is_v> is(manager, interpreter_addr);
return is.getField(&py_is_v::o_next);
}

int64_t
InterpreterUtils::getInterpreterId(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr)
{
if (!manager->versionIsAtLeast(3, 7)) {
// No support for subinterpreters so the only interpreter is ID 0.
return 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. This feels like the wrong way to handle this... It's technically possible to have multiple interpreters even for 3.6, though I don't think we'd support it particularly well if someone did it. But perhaps we should just use the address, or something like that, as a reasonable fallback? Maybe just

Suggested change
return 0;
return interpreter_addr;

Though we might want the function to return a uint64_t instead of an int64_t in that case...

Or maybe we should just make up a synthetic ID, counting up from 0...

I guess this is fine for now, maybe we can just punt on it unless and until someone complains...

}

Structure<py_is_v> is(manager, interpreter_addr);
int64_t id_value = is.getField(&py_is_v::o_id);

return id_value;
}

} // namespace pystack
24 changes: 24 additions & 0 deletions src/pystack/_pystack/interpreter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include <cstdint>
#include <memory>

#include "mem.h"
#include "process.h"

namespace pystack {

class InterpreterUtils
{
public:
// Static Methods
static remote_addr_t getNextInterpreter(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr);

static int64_t getInterpreterId(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr);
};

} // namespace pystack
13 changes: 13 additions & 0 deletions src/pystack/_pystack/interpreter.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from _pystack.mem cimport remote_addr_t
from _pystack.process cimport AbstractProcessManager
from libc.stdint cimport int64_t
from libcpp.memory cimport shared_ptr


cdef extern from "interpreter.h" namespace "pystack":
cdef cppclass InterpreterUtils:
@staticmethod
remote_addr_t getNextInterpreter(shared_ptr[AbstractProcessManager] manager, remote_addr_t interpreter_addr) except +

@staticmethod
int64_t getInterpreterId(shared_ptr[AbstractProcessManager] manager, remote_addr_t interpreter_addr) except +
1 change: 1 addition & 0 deletions src/pystack/_pystack/process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,7 @@ AbstractProcessManager::copyDebugOffsets(Structure<py_runtime_v>& py_runtime, py
set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict);
set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins);
set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil);
set_offset(py_is.o_id, &py_runtime_v::o_dbg_off_interpreter_state_id);

set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size);
set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev);
Expand Down
Loading
Loading