From 652a3e7480bc58cd07907080088af4cf2e563489 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 15 Jun 2026 11:36:08 +0200 Subject: [PATCH 1/2] Add critical section guards to thread-sensitive accessors Protect accessors with cython critical sections so free-threaded execution avoids races when reading cached state. At least one of these was noticed by free-threaded test runs. I am not certain that they will guarantee identity returns, but pretty sure this will make them thread-safe. (The reason being that at least on non free-threaded builds they may release the GIL or, on free-threaded, do API calls that release the critical section temporarily.) About the `.pyi` fixes: I think these are incorrect but `cython` is already used in other places and shouldn't be a runtime/typing requirement at all. --- cuda_core/cuda/core/_memory/_buffer.pyi | 2 ++ cuda_core/cuda/core/_memory/_buffer.pyx | 2 ++ cuda_core/cuda/core/_memory/_memory_pool.pyi | 2 ++ cuda_core/cuda/core/_memory/_memory_pool.pyx | 2 ++ cuda_core/cuda/core/_memoryview.pyx | 4 ++++ cuda_core/cuda/core/_module.pyi | 3 +++ cuda_core/cuda/core/_module.pyx | 4 ++++ 7 files changed, 19 insertions(+) diff --git a/cuda_core/cuda/core/_memory/_buffer.pyi b/cuda_core/cuda/core/_memory/_buffer.pyi index 728853c4bc7..7118a3a1e07 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyi +++ b/cuda_core/cuda/core/_memory/_buffer.pyi @@ -2,6 +2,7 @@ from __future__ import annotations +import cython from cuda.core._memory._device_memory_resource import DeviceMemoryResource from cuda.core._memory._ipc import IPCBufferDescriptor from cuda.core._memory._pinned_memory_resource import PinnedMemoryResource @@ -88,6 +89,7 @@ class Buffer: """ @property + @cython.critical_section def ipc_descriptor(self) -> IPCBufferDescriptor: """Descriptor for sharing this buffer with other processes.""" diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx index 88f9054385a..5f6e3fe84d8 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -191,6 +191,7 @@ cdef class Buffer: return _ipc.Buffer_from_ipc_descriptor(cls, mr, ipc_descriptor, stream) @property + @cython.critical_section def ipc_descriptor(self) -> IPCBufferDescriptor: """Descriptor for sharing this buffer with other processes.""" if self._ipc_data is None: @@ -445,6 +446,7 @@ cdef class Buffer: # Memory Attribute Query Helpers # ------------------------------ +@cython.critical_section cdef inline void _init_mem_attrs(Buffer self): """Initialize memory attributes by querying the pointer.""" if not self._mem_attrs_inited: diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyi b/cuda_core/cuda/core/_memory/_memory_pool.pyi index 3d15d9f679f..7f8c64aedda 100644 --- a/cuda_core/cuda/core/_memory/_memory_pool.pyi +++ b/cuda_core/cuda/core/_memory/_memory_pool.pyi @@ -4,6 +4,7 @@ from __future__ import annotations import uuid +import cython from cuda.core._memory._buffer import Buffer, MemoryResource from cuda.core._stream import Stream from cuda.core.graph import GraphBuilder @@ -97,6 +98,7 @@ class _MemPool(MemoryResource): """ @property + @cython.critical_section def attributes(self) -> _MemPoolAttributes: """Memory pool attributes.""" diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx index c6276f0f3de..ddcac2d6063 100644 --- a/cuda_core/cuda/core/_memory/_memory_pool.pyx +++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx @@ -4,6 +4,7 @@ from __future__ import annotations +cimport cython from libc.limits cimport ULLONG_MAX from libc.stdint cimport uintptr_t from libc.string cimport memset @@ -177,6 +178,7 @@ cdef class _MemPool(MemoryResource): _MP_deallocate(self, ptr, size, s) @property + @cython.critical_section def attributes(self) -> _MemPoolAttributes: """Memory pool attributes.""" if self._attributes is None: diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx index c65107ae273..665128b65b4 100644 --- a/cuda_core/cuda/core/_memoryview.pyx +++ b/cuda_core/cuda/core/_memoryview.pyx @@ -4,6 +4,7 @@ from __future__ import annotations +cimport cython from ._dlpack cimport * from ._dlpack import classify_dl_device from libc.stdint cimport intptr_t @@ -539,6 +540,7 @@ cdef class StridedMemoryView: + f" readonly={self.readonly},\n" + f" exporting_obj={get_simple_repr(self.exporting_obj)})") + @cython.critical_section cdef inline _StridedLayout get_layout(self): if self._layout is None: if self.dl_tensor: @@ -549,6 +551,7 @@ cdef class StridedMemoryView: raise ValueError("Cannot infer layout from the exporting object") return self._layout + @cython.critical_section cdef inline object get_buffer(self): """ Returns Buffer instance with the underlying data. @@ -562,6 +565,7 @@ cdef class StridedMemoryView: self._buffer = Buffer.from_handle(self.ptr, 0, owner=self.exporting_obj) return self._buffer + @cython.critical_section cdef inline object get_dtype(self): if self._dtype is None: if self.dl_tensor != NULL: diff --git a/cuda_core/cuda/core/_module.pyi b/cuda_core/cuda/core/_module.pyi index caf6b09b717..5125b99131a 100644 --- a/cuda_core/cuda/core/_module.pyi +++ b/cuda_core/cuda/core/_module.pyi @@ -5,6 +5,7 @@ from __future__ import annotations from collections import namedtuple from os import PathLike +import cython from cuda.core._device import Device from cuda.core._launch_config import LaunchConfig from cuda.core._stream import Stream @@ -253,6 +254,7 @@ class Kernel: ... @property + @cython.critical_section def attributes(self) -> KernelAttributes: """Get the read-only attributes of this kernel.""" @@ -265,6 +267,7 @@ class Kernel: """list[ParamInfo]: (offset, size) for each argument of this function""" @property + @cython.critical_section def occupancy(self) -> KernelOccupancy: """Get the occupancy information for launching this kernel.""" diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx index 5cb1b7f0059..bf57afbc601 100644 --- a/cuda_core/cuda/core/_module.pyx +++ b/cuda_core/cuda/core/_module.pyx @@ -4,6 +4,7 @@ from __future__ import annotations +cimport cython from libc.stddef cimport size_t from collections import namedtuple @@ -454,6 +455,7 @@ cdef class Kernel: return ker @property + @cython.critical_section def attributes(self) -> KernelAttributes: """Get the read-only attributes of this kernel.""" if self._attributes is None: @@ -501,6 +503,7 @@ cdef class Kernel: return param_info @property + @cython.critical_section def occupancy(self) -> KernelOccupancy: """Get the occupancy information for launching this kernel.""" if self._occupancy is None: @@ -742,6 +745,7 @@ cdef class ObjectCode: # TODO: do we want to unload in a finalizer? Probably not.. + @cython.critical_section cdef int _lazy_load_module(self) except -1: if self._h_library: return 0 From d7503654d61fcb0e52b0c83eb2e4c9cdc77899ff Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 16 Jun 2026 09:20:13 +0200 Subject: [PATCH 2/2] Apply suggestion from @seberg --- cuda_core/cuda/core/_memory/_buffer.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx index 5f6e3fe84d8..fafc4788d8c 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -446,7 +446,6 @@ cdef class Buffer: # Memory Attribute Query Helpers # ------------------------------ -@cython.critical_section cdef inline void _init_mem_attrs(Buffer self): """Initialize memory attributes by querying the pointer.""" if not self._mem_attrs_inited: