Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions cuda_core/cuda/core/_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ cdef class DeviceProperties:

@property
def gpu_overlap(self) -> bool:
"""bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead async_engine_count."""
"""bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use :attr:`~DeviceProperties.async_engine_count` instead."""
return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP))

@property
Expand Down Expand Up @@ -662,7 +662,7 @@ cdef class DeviceProperties:

@property
def read_only_host_register_supported(self) -> bool:
"""bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""
"""bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTREGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""
return bool(
self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED)
)
Expand Down Expand Up @@ -841,12 +841,12 @@ cdef class DeviceProperties:

@property
def mem_decompress_algorithm_mask(self) -> int:
"""int: The returned valued shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""
"""int: The returned value shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""
return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK)

@property
def mem_decompress_maximum_length(self) -> int:
"""int: The returned valued is the maximum length in bytes of a single decompress operation that is allowed."""
"""int: The returned value is the maximum length in bytes of a single decompress operation that is allowed."""
return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH)

@property
Expand Down Expand Up @@ -897,7 +897,7 @@ cdef class DeviceProperties:

@property
def host_memory_pools_supported(self) -> bool:
"""bool: Device suports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""
"""bool: Device supports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""
return bool(
self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_MEMORY_POOLS_SUPPORTED)
)
Expand Down Expand Up @@ -1033,7 +1033,7 @@ class Device:
Parameters
----------
peer : Device | int
The peer device to check accessibility to. Can be a Device object or device ID.
The peer device to check accessibility to. Can be a :obj:`~_device.Device` object or device ID.
"""
peer = Device(peer)
cdef int d1 = <int> self.device_id
Expand Down Expand Up @@ -1253,7 +1253,7 @@ class Device:

Note
----
The newly context will not be set as current.
The newly created context will not be set as current.

Parameters
----------
Expand All @@ -1269,7 +1269,7 @@ class Device:
raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")

def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
"""Create a Stream object.
"""Create a :obj:`~_stream.Stream` object.

New stream objects can be created in two different ways:

Expand Down Expand Up @@ -1300,7 +1300,7 @@ class Device:
return Stream._init(obj=obj, options=options, device_id=self._device_id, ctx=self._context)

def create_event(self, options: EventOptions | None = None) -> Event:
"""Create an Event object without recording it to a Stream.
"""Create an :obj:`~_event.Event` object without recording it to a :obj:`~_stream.Stream`.

Note
----
Expand Down
15 changes: 14 additions & 1 deletion cuda_core/cuda/core/_event.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,20 @@ cdef class Event:

@classmethod
def from_ipc_descriptor(cls, ipc_descriptor: IPCEventDescriptor) -> Event:
"""Import an event that was exported from another process."""
"""Import an event that was exported from another process.

Parameters
----------
ipc_descriptor : :obj:`~_memory._ipc.IPCEventDescriptor`
The IPC descriptor obtained from :attr:`~Event.ipc_descriptor` in
another process.

Returns
-------
:obj:`~_event.Event`
A new event backed by the imported IPC handle.

"""
cdef cydriver.CUipcEventHandle data
memcpy(data.reserved, <const void*><const char*>(ipc_descriptor._reserved), sizeof(data.reserved))
cdef Event self = Event.__new__(cls)
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/_linker.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ class LinkerOptions:
Attributes
----------
name : str, optional
Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`.
Name of the linker. If the linking succeeds, the name is passed down to the generated :class:`ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down
5 changes: 3 additions & 2 deletions cuda_core/cuda/core/_memory/_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,9 @@ cdef class Buffer:

Parameters
----------
dst : :obj:`~_memory.Buffer`
Source buffer to copy data from
dst : :obj:`~_memory.Buffer`, optional
Destination buffer to copy data to. If not provided, a new buffer
is allocated using this buffer's memory resource.
stream : :obj:`~_stream.Stream` | :obj:`~graph.GraphBuilder`
Keyword argument specifying the stream for the
asynchronous copy
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/_memory/_device_memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ cdef class DeviceMemoryResource(_MemPool):
Returns a tuple of sorted device IDs that currently have peer access to
allocations from this memory pool.

When setting, accepts a sequence of Device objects or device IDs.
When setting, accepts a sequence of :obj:`~_device.Device` objects or device IDs.
Setting to an empty sequence revokes all peer access.

For non-owned pools (the default or current device pool), the state
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/_module.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ cdef class KernelOccupancy:
Returns
-------
:obj:`~MaxPotentialBlockSizeOccupancyResult`
An object with `min_grid_size` amd `max_block_size` attributes encoding
An object with `min_grid_size` and `max_block_size` attributes encoding
the suggested launch configuration.

Note
Expand Down
8 changes: 4 additions & 4 deletions cuda_core/cuda/core/_program.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ class ProgramOptions:
Attributes
----------
name : str, optional
Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`.
Name of the program. If the compilation succeeds, the name is passed down to the generated :class:`ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down Expand Up @@ -272,13 +272,13 @@ class ProgramOptions:
Disable the display of a diagnostic number for warning messages.
Default: False
diag_error : Union[int, list[int]], optional
Emit error for a specified diagnostic message number or comma separated list of numbers.
Emit error for a specified diagnostic message number or comma-separated list of numbers.
Default: None
diag_suppress : Union[int, list[int]], optional
Suppress a specified diagnostic message number or comma separated list of numbers.
Suppress a specified diagnostic message number or comma-separated list of numbers.
Default: None
diag_warn : Union[int, list[int]], optional
Emit warning for a specified diagnostic message number or comma separated lis of numbers.
Emit warning for a specified diagnostic message number or comma-separated list of numbers.
Default: None
brief_diagnostics : bool, optional
Disable or enable showing source line and column info in a diagnostic.
Expand Down
11 changes: 9 additions & 2 deletions cuda_core/cuda/core/_stream.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ cdef class Stream:
def record(self, event: Event = None, options: EventOptions = None) -> Event:
"""Record an event onto the stream.

Creates an Event object (or reuses the given one) by
Creates an :obj:`~_event.Event` object (or reuses the given one) by
recording on the stream.

Parameters
Expand Down Expand Up @@ -269,6 +269,13 @@ cdef class Stream:
work is completed. This is done by recording a new :obj:`~_event.Event`
on the stream and then waiting on it.

Parameters
----------
event_or_stream : :obj:`~_event.Event` | :obj:`~_stream.Stream`
The event or stream to wait for. Objects supporting the
``__cuda_stream__`` protocol are also accepted and treated as
streams.

"""
cdef Stream stream
cdef EventHandle h_event
Expand Down Expand Up @@ -332,7 +339,7 @@ cdef class Stream:
Note
----
Stream lifetime is not managed, foreign object must remain
alive while this steam is active.
alive while this stream is active.

Parameters
----------
Expand Down
10 changes: 10 additions & 0 deletions cuda_core/cuda/core/system/_system.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ def get_driver_version_full(kernel_mode: bool = False) -> tuple[int, int, int]:
def get_nvml_version() -> tuple[int, ...]:
"""
The version of the NVML library.

Returns
-------
version: tuple[int, ...]
Tuple of integers representing the NVML version components.
"""
if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
raise RuntimeError("NVML library is not available")
Expand All @@ -97,6 +102,11 @@ def get_nvml_version() -> tuple[int, ...]:
def get_driver_branch() -> str:
"""
Retrieves the driver branch of the NVIDIA driver installed on the system.

Returns
-------
branch: str
The driver branch string (e.g., ``"560"``, ``"open"``, etc.).
"""
if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
raise RuntimeError("NVML library is not available")
Expand Down
4 changes: 2 additions & 2 deletions cuda_core/docs/source/getting-started.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
.. SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
.. SPDX-License-Identifier: Apache-2.0

.. currentmodule:: cuda.core
Expand Down Expand Up @@ -68,7 +68,7 @@ Don't forget to use :meth:`Device.set_current`!
s = dev.create_stream()

Next, we compile the CUDA C++ kernel from earlier using the :class:`Program` class.
The result of the compilation is saved as a CUBIN.
The result of the compilation is saved as a CUBIN.
Note the use of the ``name_expressions`` parameter to the :meth:`Program.compile` method to specify which kernel template instantiations to compile:

.. code-block:: python
Expand Down
Loading