diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index dc818dfd08f..7941f473ea2 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -7,10 +7,19 @@ # cuda_pathfinder/docs/source/api.rst # to keep the documentation in sync. -from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( - find_nvidia_binary_utility as find_nvidia_binary_utility, -) from cuda.pathfinder._binaries.supported_nvidia_binaries import SUPPORTED_BINARIES as _SUPPORTED_BINARIES +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityCheckError as CompatibilityCheckError, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityGuardRails as CompatibilityGuardRails, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityInsufficientMetadataError as CompatibilityInsufficientMetadataError, +) +from cuda.pathfinder._compatibility_guard_rails import ( + DriverCtkCompatibilityError as DriverCtkCompatibilityError, +) from cuda.pathfinder._dynamic_libs.load_dl_common import ( DynamicLibNotAvailableError as DynamicLibNotAvailableError, ) @@ -19,16 +28,38 @@ DynamicLibUnknownError as DynamicLibUnknownError, ) from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL as LoadedDL -from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( SUPPORTED_LIBNAMES as SUPPORTED_NVIDIA_LIBNAMES, ) from cuda.pathfinder._headers.find_nvidia_headers import LocatedHeaderDir as LocatedHeaderDir -from cuda.pathfinder._headers.find_nvidia_headers import find_nvidia_header_directory as find_nvidia_header_directory -from cuda.pathfinder._headers.find_nvidia_headers import ( +from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_bitcode_lib as find_bitcode_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_nvidia_binary_utility as find_nvidia_binary_utility, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_nvidia_header_directory as find_nvidia_header_directory, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_static_lib as find_static_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + load_nvidia_dynamic_lib as load_nvidia_dynamic_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + locate_bitcode_lib as locate_bitcode_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( locate_nvidia_header_directory as locate_nvidia_header_directory, ) -from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + locate_static_lib as locate_static_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + process_wide_compatibility_guard_rails as _process_wide_compatibility_guard_rails, +) from cuda.pathfinder._static_libs.find_bitcode_lib import ( SUPPORTED_BITCODE_LIBS as _SUPPORTED_BITCODE_LIBS, ) @@ -38,12 +69,6 @@ from cuda.pathfinder._static_libs.find_bitcode_lib import ( LocatedBitcodeLib as LocatedBitcodeLib, ) -from cuda.pathfinder._static_libs.find_bitcode_lib import ( - find_bitcode_lib as find_bitcode_lib, -) -from cuda.pathfinder._static_libs.find_bitcode_lib import ( - locate_bitcode_lib as locate_bitcode_lib, -) from cuda.pathfinder._static_libs.find_static_lib import ( SUPPORTED_STATIC_LIBS as _SUPPORTED_STATIC_LIBS, ) @@ -53,16 +78,16 @@ from cuda.pathfinder._static_libs.find_static_lib import ( StaticLibNotFoundError as StaticLibNotFoundError, ) -from cuda.pathfinder._static_libs.find_static_lib import ( - find_static_lib as find_static_lib, -) -from cuda.pathfinder._static_libs.find_static_lib import ( - locate_static_lib as locate_static_lib, -) from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home as get_cuda_path_or_home from cuda.pathfinder._version import __version__ # isort: skip +#: Process-wide default compatibility guard rails instance. Public APIs can +#: delegate through this singleton while the explicit ``CompatibilityGuardRails`` +#: class remains available for advanced use cases. +process_wide_compatibility_guard_rails = _process_wide_compatibility_guard_rails + + # Indirections to help Sphinx find the docstrings. #: Mapping from short CUDA Toolkit (CTK) library names to their canonical #: header basenames (used to validate a discovered include directory). diff --git a/cuda_pathfinder/cuda/pathfinder/_binaries/supported_nvidia_binaries.py b/cuda_pathfinder/cuda/pathfinder/_binaries/supported_nvidia_binaries.py index ac70378f112..e4c1129921e 100644 --- a/cuda_pathfinder/cuda/pathfinder/_binaries/supported_nvidia_binaries.py +++ b/cuda_pathfinder/cuda/pathfinder/_binaries/supported_nvidia_binaries.py @@ -31,4 +31,24 @@ "nsight-compute": (_NSIGHT_COMPUTE_BIN,), } +SUPPORTED_BINARIES_CTK_COMPANION_TAGS = { + "bin2c": ("toolchain_cuda_nvcc",), + "cuobjdump": ("toolchain_cuda_nvcc",), + "fatbinary": ("toolchain_cuda_nvcc",), + "nvcc": ("toolchain_cuda_nvcc",), + "nvdisasm": ("toolchain_cuda_nvcc",), + "nvlink": ("toolchain_cuda_nvcc",), + "nvprune": ("toolchain_cuda_nvcc",), +} + +# Nsight Systems and Nsight Compute ship in their own PyPI/Conda packages +# (`nvidia/nsight_systems`, `nvidia/nsight_compute`) and are not pinned by the +# `cuda-toolkit` distribution, so they cannot participate in CTK-coherence +# checks. They are tagged "other" so the guard rails treat them as separately +# packaged tools rather than reporting them as missing CTK metadata. +SUPPORTED_BINARIES_PACKAGED_WITH = { + name: ("other" if name in {"nsys", "nsight-sys", "ncu", "nsight-compute"} else "ctk") + for name in SITE_PACKAGES_BINDIRS +} + SUPPORTED_BINARIES_ALL = SUPPORTED_BINARIES = tuple(SITE_PACKAGES_BINDIRS.keys()) diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py new file mode 100644 index 00000000000..e1c4dc1becc --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -0,0 +1,1113 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import functools +import importlib.metadata +import os +import re +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import TypeAlias, cast + +from packaging.requirements import InvalidRequirement, Requirement +from packaging.specifiers import InvalidSpecifier, SpecifierSet +from packaging.version import InvalidVersion, Version + +from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( + find_nvidia_binary_utility as _find_nvidia_binary_utility, +) +from cuda.pathfinder._binaries.supported_nvidia_binaries import ( + SUPPORTED_BINARIES_CTK_COMPANION_TAGS, + SUPPORTED_BINARIES_PACKAGED_WITH, +) +from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + LocatedHeaderDir, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as _locate_nvidia_header_directory, +) +from cuda.pathfinder._headers.header_descriptor import HEADER_DESCRIPTORS +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + SUPPORTED_BITCODE_LIBS_CTK_COMPANION_TAGS, + LocatedBitcodeLib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + locate_bitcode_lib as _locate_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + SUPPORTED_STATIC_LIBS_CTK_COMPANION_TAGS, + LocatedStaticLib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + locate_static_lib as _locate_static_lib, +) +from cuda.pathfinder._utils.driver_info import ( + DriverCudaVersion, + DriverReleaseVersion, + QueryDriverCudaVersionError, + QueryDriverReleaseVersionError, + query_driver_cuda_version, + query_driver_release_version, +) +from cuda.pathfinder._utils.toolkit_info import ReadCudaHeaderVersionError, read_cuda_header_version + +ItemKind: TypeAlias = str +PackagedWith: TypeAlias = str +CtkVersionConstraintArg: TypeAlias = str | SpecifierSet | None +PairwiseItemRelationKind: TypeAlias = str +DriverCompatibilityKind: TypeAlias = str +PipelineArtifactKind: TypeAlias = str + +_CTK_VERSION_RE = re.compile(r"^(?P\d+)\.(?P\d+)") +_CTK_VERSION_CONSTRAINT_ERROR = ( + "ctk_version must be None, a non-empty PEP 440 specifier string like '>=13.2,<14', " + "or a packaging.specifiers.SpecifierSet." +) +_PAIRWISE_ITEM_RELATION_NONE = "none" +_PAIRWISE_ITEM_RELATION_EXACT_CTK_MATCH_REQUIRED = "exact-ctk-match-required" +_DRIVER_COMPATIBILITY_BACKWARD = "backward-compatibility" +_DRIVER_COMPATIBILITY_MINOR_VERSION = "minor-version-compatibility" +_PIPELINE_ARTIFACT_KIND_LTOIR = "ltoir" +_PIPELINE_ARTIFACT_KIND_PTX = "ptx" +_PIPELINE_ARTIFACT_KIND_ELF = "elf" +_PIPELINE_ARTIFACT_KIND_CUBIN = "cubin" +_PIPELINE_ARTIFACT_KINDS = ( + _PIPELINE_ARTIFACT_KIND_LTOIR, + _PIPELINE_ARTIFACT_KIND_PTX, + _PIPELINE_ARTIFACT_KIND_ELF, + _PIPELINE_ARTIFACT_KIND_CUBIN, +) +_MIN_DRIVER_BRANCH_FOR_MINOR_VERSION_COMPATIBILITY_BY_CTK_MAJOR = { + 11: 450, + 12: 525, + 13: 580, +} + + +@dataclass(frozen=True, slots=True) +class PairwiseItemRelation: + kind: PairwiseItemRelationKind + reason: str | None = None + + +@dataclass(frozen=True, slots=True) +class DriverCompatibilityDecision: + kind: DriverCompatibilityKind + detail: str + + +@dataclass(frozen=True, slots=True) +class DeclaredDynamicLibPipeline: + producer_libname: str + consumer_libname: str + artifact_kind: PipelineArtifactKind + + +# NOTE: Any new entry added to ``SUPPORTED_STATIC_LIBS`` (e.g. ``culibos``) +# or ``SUPPORTED_BITCODE_LIBS`` (e.g. a future ``device``-style bitcode lib) +# must be registered in the dicts below in the same change. The packaging +# classification is required by ``_resolve_static_lib_item`` and +# ``_resolve_bitcode_lib_item``; missing entries raise ``KeyError`` at runtime +# instead of producing a guarded ``CompatibilityInsufficientMetadataError``. +# Coverage is enforced by the parametrized resolver tests in +# ``tests/test_compatibility_guard_rails.py``. +_STATIC_LIBS_PACKAGED_WITH: dict[str, PackagedWith] = { + "cudadevrt": "ctk", +} +_BITCODE_LIBS_PACKAGED_WITH: dict[str, PackagedWith] = { + "device": "ctk", + "nccl_device": "other", + "nvshmem_device": "other", +} +_BINARY_PACKAGED_WITH: dict[str, PackagedWith] = dict(SUPPORTED_BINARIES_PACKAGED_WITH) + + +class CompatibilityCheckError(RuntimeError): + """Raised when compatibility checks reject a resolved item.""" + + +class CompatibilityInsufficientMetadataError(CompatibilityCheckError): + """Raised when v1 compatibility checks cannot reach a definitive answer.""" + + +class DriverCtkCompatibilityError(CompatibilityCheckError): + """Raised when driver-vs-CTK policy rejects a resolved item.""" + + +@dataclass(frozen=True, slots=True) +class CtkMetadata: + ctk_version: CtkVersion + ctk_root: str | None + source: str + + +@dataclass(frozen=True, slots=True) +class CtkVersion: + major: int + minor: int + + def __str__(self) -> str: + return f"{self.major}.{self.minor}" + + def as_pep440_version(self) -> Version: + return Version(str(self)) + + +@dataclass(frozen=True, slots=True) +class CtkVersionConstraint: + specifier: SpecifierSet + text: str + + def matches(self, candidate: CtkVersion) -> bool: + return bool(self.specifier.contains(candidate.as_pep440_version(), prereleases=True)) + + def __str__(self) -> str: + return self.text + + +@dataclass(frozen=True, slots=True) +class ResolvedItem: + name: str + kind: ItemKind + packaged_with: PackagedWith + abs_path: str + found_via: str | None + ctk_root: str | None + ctk_version: CtkVersion | None + ctk_version_source: str | None + dynamic_link_component: str | None + ctk_companion_tags: tuple[str, ...] + + def describe(self) -> str: + found_via = "" if self.found_via is None else f" via {self.found_via}" + return f"{self.kind} {self.name!r}{found_via} at {self.abs_path!r}" + + +@dataclass(frozen=True, slots=True) +class CompatibilityResult: + status: str + message: str + error_type: type[CompatibilityCheckError] = CompatibilityCheckError + + def require_compatible(self) -> None: + if self.status == "compatible": + return + if self.status == "insufficient_metadata": + raise CompatibilityInsufficientMetadataError(self.message) + raise self.error_type(self.message) + + +def _parse_ctk_version(cuda_version: str) -> CtkVersion | None: + match = _CTK_VERSION_RE.match(cuda_version) + if match is None: + return None + return CtkVersion(major=int(match.group("major")), minor=int(match.group("minor"))) + + +def _coerce_ctk_version_constraint(raw_value: CtkVersionConstraintArg) -> CtkVersionConstraint | None: + if raw_value is None: + return None + if isinstance(raw_value, SpecifierSet): + return CtkVersionConstraint(specifier=raw_value, text=str(raw_value)) + if isinstance(raw_value, str): + stripped = raw_value.strip() + if not stripped: + raise ValueError(_CTK_VERSION_CONSTRAINT_ERROR) + try: + specifier = SpecifierSet(stripped) + except InvalidSpecifier as exc: + raise ValueError(_CTK_VERSION_CONSTRAINT_ERROR) from exc + return CtkVersionConstraint(specifier=specifier, text=stripped) + raise ValueError(_CTK_VERSION_CONSTRAINT_ERROR) + + +def _normalize_distribution_name(name: str) -> str: + return re.sub(r"[-_.]+", "-", name).lower() + + +def _distribution_name(dist: importlib.metadata.Distribution) -> str | None: + # Work around mypy's typing of Distribution.metadata as PackageMetadata: + # the runtime object behaves like a string mapping, but mypy does not + # expose Mapping.get() on PackageMetadata. + metadata = cast(Mapping[str, str], dist.metadata) + return metadata.get("Name") + + +@functools.cache +def _owned_distribution_candidates(abs_path: str) -> tuple[tuple[str, str], ...]: + # Symlinks are intentionally not chased: ``os.path.realpath`` is omitted on + # both sides of the comparison, so editable installs that route through a + # wheel-cache symlink are not matched. This keeps the wheel-metadata path + # tied to the path the search actually returned, not to a different on-disk + # location that happens to share inodes. + normalized_abs_path = os.path.normpath(os.path.abspath(abs_path)) + matches: set[tuple[str, str]] = set() + for dist in importlib.metadata.distributions(): + dist_name = _distribution_name(dist) + if not dist_name: + continue + for file in dist.files or (): + candidate_abs_path = os.path.normpath(os.path.abspath(str(dist.locate_file(file)))) + if candidate_abs_path == normalized_abs_path: + matches.add((dist_name, dist.version)) + return tuple(sorted(matches)) + + +@functools.cache +def _cuda_toolkit_requirement_maps() -> tuple[tuple[str, CtkVersion, dict[str, tuple[SpecifierSet, ...]]], ...]: + results: list[tuple[str, CtkVersion, dict[str, tuple[SpecifierSet, ...]]]] = [] + for dist in importlib.metadata.distributions(): + dist_name = _distribution_name(dist) + if _normalize_distribution_name(dist_name or "") != "cuda-toolkit": + continue + ctk_version = _parse_ctk_version(dist.version) + if ctk_version is None: + continue + requirement_map: dict[str, set[str]] = {} + for requirement_text in dist.requires or (): + try: + requirement = Requirement(requirement_text) + except InvalidRequirement: + continue + specifier_text = str(requirement.specifier) + if not specifier_text: + continue + req_name = _normalize_distribution_name(requirement.name) + requirement_map.setdefault(req_name, set()).add(specifier_text) + results.append( + ( + dist.version, + ctk_version, + { + name: tuple(SpecifierSet(specifier_text) for specifier_text in sorted(specifier_set_texts)) + for name, specifier_set_texts in requirement_map.items() + }, + ) + ) + return tuple(results) + + +def _wheel_metadata_matches_for_abs_path(abs_path: str) -> dict[CtkVersion, str]: + matched_versions: dict[CtkVersion, str] = {} + for owner_name, owner_version in _owned_distribution_candidates(abs_path): + try: + owner_parsed_version = Version(owner_version) + except InvalidVersion: + continue + normalized_owner_name = _normalize_distribution_name(owner_name) + for toolkit_dist_version, ctk_version, requirement_map in _cuda_toolkit_requirement_maps(): + requirement_specifier_sets = requirement_map.get(normalized_owner_name, ()) + if not any( + specifier_set.contains(owner_parsed_version, prereleases=True) + for specifier_set in requirement_specifier_sets + ): + continue + matched_versions[ctk_version] = ( + f"wheel metadata via {owner_name}=={owner_version} pinned by cuda-toolkit=={toolkit_dist_version}" + ) + return matched_versions + + +def _wheel_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: + matched_versions = _wheel_metadata_matches_for_abs_path(abs_path) + if len(matched_versions) != 1: + return None + [(ctk_version, source)] = matched_versions.items() + return CtkMetadata(ctk_version=ctk_version, ctk_root=None, source=source) + + +def _normalized_ctk_root_for_cuda_header(cuda_header_path: Path) -> Path: + ctk_root = cuda_header_path.parent.parent + if ctk_root.parent.name == "targets": + return ctk_root.parent.parent + return ctk_root + + +@functools.cache +def _cuda_header_metadata_for_ctk_root_candidate(ctk_root_candidate: str) -> CtkMetadata | None: + candidate_path = Path(ctk_root_candidate) + header_paths: list[Path] = [] + + direct_header = candidate_path / "include" / "cuda.h" + if direct_header.is_file(): + header_paths.append(direct_header) + + targets_dir = candidate_path / "targets" + if targets_dir.is_dir(): + header_paths.extend(sorted(path for path in targets_dir.glob("*/include/cuda.h") if path.is_file())) + + matches: list[tuple[CtkVersion, Path, Path]] = [] + for cuda_header_path in header_paths: + try: + version = read_cuda_header_version(str(cuda_header_path)) + except ReadCudaHeaderVersionError: + continue + matches.append( + ( + CtkVersion(major=version.major, minor=version.minor), + _normalized_ctk_root_for_cuda_header(cuda_header_path), + cuda_header_path, + ) + ) + + if not matches: + return None + + ctk_version, ctk_root, source_path = matches[0] + if any(other_version != ctk_version for other_version, _other_root, _other_source in matches[1:]): + return None + + return CtkMetadata( + ctk_version=ctk_version, + ctk_root=str(ctk_root), + source=f"cuda.h at {source_path}", + ) + + +def _ctk_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: + current = Path(abs_path) + if current.is_file(): + current = current.parent + for candidate in (current, *current.parents): + ctk_metadata = _cuda_header_metadata_for_ctk_root_candidate(str(candidate)) + if ctk_metadata is not None: + return ctk_metadata + return _wheel_metadata_for_abs_path(abs_path) + + +def _resolve_item( + *, + name: str, + kind: ItemKind, + packaged_with: PackagedWith, + abs_path: str, + found_via: str | None, + dynamic_link_component: str | None = None, + ctk_companion_tags: tuple[str, ...] = (), +) -> ResolvedItem: + ctk_metadata = _ctk_metadata_for_abs_path(abs_path) + return ResolvedItem( + name=name, + kind=kind, + packaged_with=packaged_with, + abs_path=abs_path, + found_via=found_via, + ctk_root=None if ctk_metadata is None else ctk_metadata.ctk_root, + ctk_version=None if ctk_metadata is None else ctk_metadata.ctk_version, + ctk_version_source=None if ctk_metadata is None else ctk_metadata.source, + dynamic_link_component=dynamic_link_component, + ctk_companion_tags=ctk_companion_tags, + ) + + +def _resolve_dynamic_lib_item(libname: str, loaded: LoadedDL) -> ResolvedItem: + if loaded.abs_path is None: + raise CompatibilityInsufficientMetadataError( + f"Could not determine an absolute path for dynamic library {libname!r}." + ) + desc = LIB_DESCRIPTORS[libname] + return _resolve_item( + name=libname, + kind="dynamic-lib", + packaged_with=desc.packaged_with, + abs_path=loaded.abs_path, + found_via=loaded.found_via, + dynamic_link_component=desc.dynamic_link_component, + ctk_companion_tags=desc.ctk_companion_tags, + ) + + +def _resolve_header_item(libname: str, located: LocatedHeaderDir) -> ResolvedItem: + if located.abs_path is None: + raise CompatibilityInsufficientMetadataError( + f"Could not determine an absolute path for header directory {libname!r}." + ) + desc = HEADER_DESCRIPTORS[libname] + metadata_abs_path = os.path.join(located.abs_path, desc.header_basename) + return _resolve_item( + name=libname, + kind="header-dir", + packaged_with=desc.packaged_with, + abs_path=metadata_abs_path, + found_via=located.found_via, + ctk_companion_tags=desc.ctk_companion_tags, + ) + + +def _resolve_static_lib_item(located: LocatedStaticLib) -> ResolvedItem: + packaged_with = _STATIC_LIBS_PACKAGED_WITH[located.name] + return _resolve_item( + name=located.name, + kind="static-lib", + packaged_with=packaged_with, + abs_path=located.abs_path, + found_via=located.found_via, + ctk_companion_tags=SUPPORTED_STATIC_LIBS_CTK_COMPANION_TAGS.get(located.name, ()), + ) + + +def _resolve_bitcode_lib_item(located: LocatedBitcodeLib) -> ResolvedItem: + packaged_with = _BITCODE_LIBS_PACKAGED_WITH[located.name] + return _resolve_item( + name=located.name, + kind="bitcode-lib", + packaged_with=packaged_with, + abs_path=located.abs_path, + found_via=located.found_via, + ctk_companion_tags=SUPPORTED_BITCODE_LIBS_CTK_COMPANION_TAGS.get(located.name, ()), + ) + + +def _resolve_binary_item(utility_name: str, abs_path: str) -> ResolvedItem: + packaged_with = _BINARY_PACKAGED_WITH[utility_name] + return _resolve_item( + name=utility_name, + kind="binary", + packaged_with=packaged_with, + abs_path=abs_path, + found_via=None, + ctk_companion_tags=SUPPORTED_BINARIES_CTK_COMPANION_TAGS.get(utility_name, ()), + ) + + +def _unsupported_packaging_message( + item: ResolvedItem, *, allow_compatibility_neutral_driver_libraries: bool = False +) -> str: + message = "v1 compatibility checks only give definitive answers for packaged_with='ctk' items" + if allow_compatibility_neutral_driver_libraries: + message += ", plus compatibility-neutral driver libraries" + return f"{message}. {item.describe()} is packaged_with={item.packaged_with!r}." + + +def _missing_ctk_metadata_message(item: ResolvedItem) -> str: + base = ( + "v1 compatibility checks require either an enclosing CUDA Toolkit root " + "with cuda.h or wheel metadata that can be traced to an installed " + f"cuda-toolkit distribution. Could not determine the CTK version for {item.describe()}." + ) + matches = _wheel_metadata_matches_for_abs_path(item.abs_path) + if len(matches) > 1: + # Multiple cuda-toolkit distributions claim the same wheel-installed + # file; surface them so users can disambiguate (typically by removing + # one of the conflicting cuda-toolkit==X.Y wheels). + rendered = ", ".join(f"CTK {ctk_version} ({source})" for ctk_version, source in sorted(matches.items())) + base += f" Wheel metadata matched multiple incompatible CTK versions: {rendered}." + return base + + +def _ctk_constraint_failure_message(item: ResolvedItem, constraint: CtkVersionConstraint) -> str: + assert item.ctk_version is not None + return f"{item.describe()} resolves to CTK {item.ctk_version}, which does not satisfy ctk_version{constraint}." + + +def _driver_backward_compatibility_detail(driver_cuda_version: DriverCudaVersion, item: ResolvedItem) -> str: + assert item.ctk_version is not None + return ( + f"the driver satisfies backward compatibility because cuDriverGetVersion() reports " + f"CUDA {driver_cuda_version.major}.{driver_cuda_version.minor}, which is not older than CTK {item.ctk_version}" + ) + + +def _driver_minor_version_compatibility_detail( + driver_cuda_version: DriverCudaVersion, + driver_release_version: DriverReleaseVersion, + item: ResolvedItem, + *, + required_branch: int, +) -> str: + assert item.ctk_version is not None + return ( + "the driver satisfies NVIDIA's same-major minor-version compatibility because " + f"cuDriverGetVersion() reports older CUDA {driver_cuda_version.major}.{driver_cuda_version.minor}, " + f"but display-driver release {driver_release_version.text} meets the published CUDA " + f"{item.ctk_version.major}.x minimum branch >= {required_branch}" + ) + + +def _ctk_pair_mismatch_message( + item1: ResolvedItem, + item2: ResolvedItem, + relation: PairwiseItemRelation, +) -> str: + assert item1.ctk_version is not None + assert item2.ctk_version is not None + assert relation.reason is not None + requirement_reason = relation.reason[:1].upper() + relation.reason[1:] + return ( + f"{item1.describe()} resolves to CTK {item1.ctk_version}, while " + f"{item2.describe()} resolves to CTK {item2.ctk_version}. " + f"{requirement_reason}, so v1 requires an exact CTK major.minor match." + ) + + +def _driver_major_mismatch_message(driver_cuda_version: DriverCudaVersion, item: ResolvedItem) -> str: + assert item.ctk_version is not None + return ( + f"Driver version {driver_cuda_version.encoded} only supports CUDA major version {driver_cuda_version.major}, " + f"but {item.describe()} requires CTK {item.ctk_version}. " + "v1 requires driver_major >= ctk_major." + ) + + +def _driver_cuda_version_too_old_message(driver_cuda_version: DriverCudaVersion, item: ResolvedItem) -> str: + assert item.ctk_version is not None + return ( + f"cuDriverGetVersion() reports CUDA {driver_cuda_version.major}.{driver_cuda_version.minor}, " + f"but {item.describe()} requires CTK {item.ctk_version}. " + "NVIDIA's published minor-version compatibility starts with CUDA 11, so v1 requires " + "the driver CUDA version to be at least the CTK version for older CTK majors." + ) + + +def _missing_driver_release_version_message(driver_cuda_version: DriverCudaVersion, item: ResolvedItem) -> str: + assert item.ctk_version is not None + return ( + f"cuDriverGetVersion() reports older CUDA {driver_cuda_version.major}.{driver_cuda_version.minor} for " + f"{item.describe()}, which requires CTK {item.ctk_version}. Determining whether NVIDIA's same-major " + "minor-version compatibility applies requires the display-driver release version (for example " + "'535.54.03' or branch '535')." + ) + + +def _driver_release_branch_too_old_message( + driver_cuda_version: DriverCudaVersion, + driver_release_version: DriverReleaseVersion, + item: ResolvedItem, + *, + required_branch: int, +) -> str: + assert item.ctk_version is not None + return ( + f"cuDriverGetVersion() reports older CUDA {driver_cuda_version.major}.{driver_cuda_version.minor}, " + f"and display-driver release {driver_release_version.text} (branch {driver_release_version.branch}) " + f"is below NVIDIA's published CUDA {item.ctk_version.major}.x minimum branch >= {required_branch} " + f"for {item.describe()}." + ) + + +def _declared_dynamic_lib_pipeline_description(pipeline: DeclaredDynamicLibPipeline) -> str: + return ( + f"declared dynamic-lib pipeline {pipeline.producer_libname!r} -> {pipeline.consumer_libname!r} " + f"for artifact kind {pipeline.artifact_kind!r}" + ) + + +def _nvjitlink_ltoir_major_mismatch_message( + producer_item: ResolvedItem, + consumer_item: ResolvedItem, + pipeline: DeclaredDynamicLibPipeline, +) -> str: + assert producer_item.ctk_version is not None + assert consumer_item.ctk_version is not None + return ( + f"{_declared_dynamic_lib_pipeline_description(pipeline)} uses producer {producer_item.describe()} at " + f"CTK {producer_item.ctk_version} and consumer {consumer_item.describe()} at CTK {consumer_item.ctk_version}. " + "For LTOIR inputs, NVIDIA documents nvJitLink compatibility only within a major release family, " + "so v1 requires the producer and consumer CTK majors to match." + ) + + +def _nvjitlink_ltoir_consumer_too_old_message( + producer_item: ResolvedItem, + consumer_item: ResolvedItem, + pipeline: DeclaredDynamicLibPipeline, +) -> str: + assert producer_item.ctk_version is not None + assert consumer_item.ctk_version is not None + return ( + f"{_declared_dynamic_lib_pipeline_description(pipeline)} uses producer {producer_item.describe()} at " + f"CTK {producer_item.ctk_version} and consumer {consumer_item.describe()} at CTK {consumer_item.ctk_version}. " + "For LTOIR inputs, NVIDIA documents that nvJitLink must be >= the producer version, " + "so v1 rejects an older nvJitLink consumer." + ) + + +def _nvvm_pipeline_conservative_message( + producer_item: ResolvedItem, + consumer_item: ResolvedItem, + pipeline: DeclaredDynamicLibPipeline, +) -> str: + assert producer_item.ctk_version is not None + assert consumer_item.ctk_version is not None + return ( + f"{_declared_dynamic_lib_pipeline_description(pipeline)} involves {producer_item.describe()} at " + f"CTK {producer_item.ctk_version} and {consumer_item.describe()} at CTK {consumer_item.ctk_version}. " + "v1 remains conservative for explicit nvvm pipeline contexts until NVVM IR version and dialect are modeled, " + "so it requires an exact CTK major.minor match." + ) + + +def _compatible_pair_message( + driver_decision: DriverCompatibilityDecision, + item1: ResolvedItem, + item2: ResolvedItem, + relation: PairwiseItemRelation, +) -> str: + assert item1.ctk_version is not None + assert item2.ctk_version is not None + if relation.kind == _PAIRWISE_ITEM_RELATION_NONE: + if item1.ctk_version == item2.ctk_version: + shared_clause = ( + f"{item1.describe()} and {item2.describe()} both resolve to CTK {item1.ctk_version}, " + "and v1 does not require any direct relation between them" + ) + else: + shared_clause = ( + f"{item1.describe()} resolves to CTK {item1.ctk_version}, " + f"{item2.describe()} resolves to CTK {item2.ctk_version}, " + "and v1 does not require exact CTK lockstep for this pair" + ) + return f"{shared_clause}. Separately, {driver_decision.detail}." + assert relation.reason is not None + return ( + f"{item1.describe()} and {item2.describe()} both resolve to CTK {item1.ctk_version}. " + f"{relation.reason[:1].upper() + relation.reason[1:]}. Separately, {driver_decision.detail}." + ) + + +def _supported_packaging_result(item: ResolvedItem) -> CompatibilityResult | None: + if item.packaged_with == "ctk": + return None + return CompatibilityResult(status="insufficient_metadata", message=_unsupported_packaging_message(item)) + + +def _ctk_metadata_result(item: ResolvedItem) -> CompatibilityResult | None: + if item.ctk_version is not None and item.ctk_version_source is not None: + return None + return CompatibilityResult(status="insufficient_metadata", message=_missing_ctk_metadata_message(item)) + + +def _shared_ctk_companion_tags(item1: ResolvedItem, item2: ResolvedItem) -> tuple[str, ...]: + return tuple(sorted(set(item1.ctk_companion_tags).intersection(item2.ctk_companion_tags))) + + +def _classify_pairwise_item_relation(item1: ResolvedItem, item2: ResolvedItem) -> PairwiseItemRelation: + if item1.packaged_with == "driver" or item2.packaged_with == "driver": + return PairwiseItemRelation(_PAIRWISE_ITEM_RELATION_NONE) + if item1.dynamic_link_component is not None and item1.dynamic_link_component == item2.dynamic_link_component: + return PairwiseItemRelation( + _PAIRWISE_ITEM_RELATION_EXACT_CTK_MATCH_REQUIRED, + reason=f"they are in the same authored dynamic-link component {item1.dynamic_link_component!r}", + ) + shared_companion_tags = _shared_ctk_companion_tags(item1, item2) + if shared_companion_tags: + if len(shared_companion_tags) == 1: + tag_description = repr(shared_companion_tags[0]) + reason = f"they share the authored companion tag {tag_description}" + else: + tags_description = ", ".join(repr(tag) for tag in shared_companion_tags) + reason = f"they share the authored companion tags {tags_description}" + return PairwiseItemRelation(_PAIRWISE_ITEM_RELATION_EXACT_CTK_MATCH_REQUIRED, reason=reason) + return PairwiseItemRelation(_PAIRWISE_ITEM_RELATION_NONE) + + +def _ctk_coherence_result( + item1: ResolvedItem, + item2: ResolvedItem, + relation: PairwiseItemRelation, +) -> CompatibilityResult | None: + assert item1.ctk_version is not None + assert item2.ctk_version is not None + if item1.ctk_version == item2.ctk_version: + return None + return CompatibilityResult(status="incompatible", message=_ctk_pair_mismatch_message(item1, item2, relation)) + + +def _pairwise_policy_result( + item1: ResolvedItem, + item2: ResolvedItem, + relation: PairwiseItemRelation | None = None, +) -> CompatibilityResult | None: + if relation is None: + relation = _classify_pairwise_item_relation(item1, item2) + if relation.kind == _PAIRWISE_ITEM_RELATION_NONE: + return None + if relation.kind == _PAIRWISE_ITEM_RELATION_EXACT_CTK_MATCH_REQUIRED: + return _ctk_coherence_result(item1, item2, relation) + raise AssertionError(f"Unhandled pairwise item relation: {relation.kind!r}") + + +def _declared_dynamic_lib_pipeline_result( + producer_item: ResolvedItem, + consumer_item: ResolvedItem, + pipeline: DeclaredDynamicLibPipeline, +) -> CompatibilityResult | None: + assert producer_item.ctk_version is not None + assert consumer_item.ctk_version is not None + if "nvvm" in (producer_item.name, consumer_item.name): + if producer_item.ctk_version == consumer_item.ctk_version: + return None + return CompatibilityResult( + status="incompatible", + message=_nvvm_pipeline_conservative_message(producer_item, consumer_item, pipeline), + ) + if producer_item.name == "nvrtc" and consumer_item.name == "nvJitLink": + if pipeline.artifact_kind in ( + _PIPELINE_ARTIFACT_KIND_PTX, + _PIPELINE_ARTIFACT_KIND_ELF, + _PIPELINE_ARTIFACT_KIND_CUBIN, + ): + # NVIDIA documents broader compatibility for PTX/ELF/CUBIN inputs than for LTOIR. + return None + assert pipeline.artifact_kind == _PIPELINE_ARTIFACT_KIND_LTOIR + if producer_item.ctk_version.major != consumer_item.ctk_version.major: + return CompatibilityResult( + status="incompatible", + message=_nvjitlink_ltoir_major_mismatch_message(producer_item, consumer_item, pipeline), + ) + if ( + consumer_item.ctk_version.major, + consumer_item.ctk_version.minor, + ) < ( + producer_item.ctk_version.major, + producer_item.ctk_version.minor, + ): + return CompatibilityResult( + status="incompatible", + message=_nvjitlink_ltoir_consumer_too_old_message(producer_item, consumer_item, pipeline), + ) + return None + + +def _driver_cuda_version_supports_ctk_by_backward_compatibility( + driver_cuda_version: DriverCudaVersion, + ctk_version: CtkVersion, +) -> bool: + return (driver_cuda_version.major, driver_cuda_version.minor) >= (ctk_version.major, ctk_version.minor) + + +def _driver_compatibility_outcome( + driver_cuda_version: DriverCudaVersion, + item: ResolvedItem, + *, + driver_release_version: DriverReleaseVersion | None = None, +) -> DriverCompatibilityDecision | CompatibilityResult: + assert item.ctk_version is not None + if _driver_cuda_version_supports_ctk_by_backward_compatibility(driver_cuda_version, item.ctk_version): + return DriverCompatibilityDecision( + kind=_DRIVER_COMPATIBILITY_BACKWARD, + detail=_driver_backward_compatibility_detail(driver_cuda_version, item), + ) + if driver_cuda_version.major != item.ctk_version.major: + return CompatibilityResult( + status="incompatible", + message=_driver_major_mismatch_message(driver_cuda_version, item), + error_type=DriverCtkCompatibilityError, + ) + required_branch = _MIN_DRIVER_BRANCH_FOR_MINOR_VERSION_COMPATIBILITY_BY_CTK_MAJOR.get(item.ctk_version.major) + if required_branch is None: + return CompatibilityResult( + status="incompatible", + message=_driver_cuda_version_too_old_message(driver_cuda_version, item), + error_type=DriverCtkCompatibilityError, + ) + if driver_release_version is None: + return CompatibilityResult( + status="insufficient_metadata", + message=_missing_driver_release_version_message(driver_cuda_version, item), + ) + if driver_release_version.branch >= required_branch: + return DriverCompatibilityDecision( + kind=_DRIVER_COMPATIBILITY_MINOR_VERSION, + detail=_driver_minor_version_compatibility_detail( + driver_cuda_version, + driver_release_version, + item, + required_branch=required_branch, + ), + ) + return CompatibilityResult( + status="incompatible", + message=_driver_release_branch_too_old_message( + driver_cuda_version, + driver_release_version, + item, + required_branch=required_branch, + ), + error_type=DriverCtkCompatibilityError, + ) + + +def compatibility_check( + driver_cuda_version: DriverCudaVersion, + item1: ResolvedItem, + item2: ResolvedItem, + *, + driver_release_version: DriverReleaseVersion | None = None, +) -> CompatibilityResult: + for item in (item1, item2): + result = _supported_packaging_result(item) + if result is not None: + return result + result = _ctk_metadata_result(item) + if result is not None: + return result + + relation = _classify_pairwise_item_relation(item1, item2) + result = _pairwise_policy_result(item1, item2, relation) + if result is not None: + return result + + driver_outcome = _driver_compatibility_outcome( + driver_cuda_version, + item1, + driver_release_version=driver_release_version, + ) + if isinstance(driver_outcome, CompatibilityResult): + return driver_outcome + + return CompatibilityResult( + status="compatible", + message=_compatible_pair_message(driver_outcome, item1, item2, relation), + ) + + +class CompatibilityGuardRails: + """Resolve CUDA artifacts while enforcing minimal v1 compatibility guard rails.""" + + def __init__( + self, + *, + ctk_version: CtkVersionConstraintArg = None, + driver_cuda_version: DriverCudaVersion | None = None, + driver_release_version: DriverReleaseVersion | None = None, + ) -> None: + self._ctk_version_constraint = _coerce_ctk_version_constraint(ctk_version) + self._configured_driver_cuda_version = driver_cuda_version + self._driver_cuda_version = driver_cuda_version + self._configured_driver_release_version = driver_release_version + self._driver_release_version = driver_release_version + self._resolved_items: list[ResolvedItem] = [] + self._declared_dynamic_lib_pipelines: set[DeclaredDynamicLibPipeline] = set() + self._checked_dynamic_lib_pipelines: set[DeclaredDynamicLibPipeline] = set() + + def _get_driver_cuda_version(self) -> DriverCudaVersion: + if self._driver_cuda_version is None: + try: + self._driver_cuda_version = query_driver_cuda_version() + except QueryDriverCudaVersionError as exc: + raise CompatibilityCheckError( + "Failed to query the CUDA driver version needed for compatibility checks." + ) from exc + return self._driver_cuda_version + + def _get_driver_release_version(self) -> DriverReleaseVersion: + if self._driver_release_version is None: + try: + self._driver_release_version = query_driver_release_version() + except QueryDriverReleaseVersionError as exc: + raise CompatibilityInsufficientMetadataError( + "Failed to query the display-driver release version needed for compatibility checks." + ) from exc + return self._driver_release_version + + def _enforce_supported_packaging(self, item: ResolvedItem) -> None: + if item.packaged_with == "ctk": + return + raise CompatibilityInsufficientMetadataError( + _unsupported_packaging_message(item, allow_compatibility_neutral_driver_libraries=True) + ) + + def _enforce_ctk_metadata(self, item: ResolvedItem) -> None: + result = _ctk_metadata_result(item) + if result is None: + return + result.require_compatible() + + def _enforce_constraints(self, item: ResolvedItem) -> None: + assert item.ctk_version is not None + if self._ctk_version_constraint is not None and not self._ctk_version_constraint.matches(item.ctk_version): + raise CompatibilityCheckError(_ctk_constraint_failure_message(item, self._ctk_version_constraint)) + + def _enforce_driver_compatibility(self, item: ResolvedItem) -> None: + driver_cuda_version = self._get_driver_cuda_version() + assert item.ctk_version is not None + driver_release_version = ( + None + if _driver_cuda_version_supports_ctk_by_backward_compatibility(driver_cuda_version, item.ctk_version) + else self._get_driver_release_version() + ) + outcome = _driver_compatibility_outcome( + driver_cuda_version, + item, + driver_release_version=driver_release_version, + ) + if isinstance(outcome, CompatibilityResult): + outcome.require_compatible() + + def _enforce_pairwise_compatibility(self, prior_item: ResolvedItem, item: ResolvedItem) -> None: + result = _pairwise_policy_result(prior_item, item) + if result is not None: + result.require_compatible() + + def _remembered_item(self, *, kind: ItemKind, name: str) -> ResolvedItem | None: + for item in reversed(self._resolved_items): + if item.kind == kind and item.name == name: + return item + return None + + def _enforce_declared_dynamic_lib_pipeline_if_ready(self, pipeline: DeclaredDynamicLibPipeline) -> None: + if pipeline in self._checked_dynamic_lib_pipelines: + return + producer_item = self._remembered_item(kind="dynamic-lib", name=pipeline.producer_libname) + if producer_item is None: + return + consumer_item = self._remembered_item(kind="dynamic-lib", name=pipeline.consumer_libname) + if consumer_item is None: + return + result = _declared_dynamic_lib_pipeline_result(producer_item, consumer_item, pipeline) + if result is not None: + result.require_compatible() + self._checked_dynamic_lib_pipelines.add(pipeline) + + def _enforce_declared_dynamic_lib_pipelines_for_item(self, item: ResolvedItem) -> None: + if item.kind != "dynamic-lib": + return + for pipeline in self._declared_dynamic_lib_pipelines: + if item.name not in (pipeline.producer_libname, pipeline.consumer_libname): + continue + self._enforce_declared_dynamic_lib_pipeline_if_ready(pipeline) + + def _remember(self, item: ResolvedItem) -> None: + if item not in self._resolved_items: + self._resolved_items.append(item) + self._enforce_declared_dynamic_lib_pipelines_for_item(item) + + def _declare_dynamic_lib_pipeline( + self, + *, + producer_libname: str, + consumer_libname: str, + artifact_kind: PipelineArtifactKind, + ) -> None: + """Register a producer/consumer pipeline so v1 can enforce its policy. + + Intentionally single-underscored: the pipeline API stays private in v1 + because the artifact taxonomy and policy matrix are expected to evolve + before they are promoted to a public surface. Internal callers (e.g. + ``cuda_bindings``' nvJitLink/nvrtc pairings) reach into this method + directly via the ``CompatibilityGuardRails`` instance. + """ + if producer_libname not in LIB_DESCRIPTORS: + raise ValueError(f"Unknown dynamic library producer: {producer_libname!r}") + if consumer_libname not in LIB_DESCRIPTORS: + raise ValueError(f"Unknown dynamic library consumer: {consumer_libname!r}") + if artifact_kind not in _PIPELINE_ARTIFACT_KINDS: + allowed_values = ", ".join(repr(kind) for kind in _PIPELINE_ARTIFACT_KINDS) + raise ValueError(f"Invalid pipeline artifact kind {artifact_kind!r}. Allowed values: {allowed_values}.") + pipeline = DeclaredDynamicLibPipeline( + producer_libname=producer_libname, + consumer_libname=consumer_libname, + artifact_kind=artifact_kind, + ) + self._declared_dynamic_lib_pipelines.add(pipeline) + self._enforce_declared_dynamic_lib_pipeline_if_ready(pipeline) + + def _reset_state(self) -> None: + """Clear remembered items and pipelines while preserving constructor overrides. + + Called both from tests and from the public ``cache_clear`` helpers in + ``_process_wide_compatibility_guard_rails`` so a fresh search cycle does + not see leftover compatibility state. Driver versions that the caller + explicitly passed to ``__init__`` are intentionally re-applied; only the + lazily-queried values are dropped. + """ + self._driver_cuda_version = self._configured_driver_cuda_version + self._driver_release_version = self._configured_driver_release_version + self._resolved_items.clear() + self._declared_dynamic_lib_pipelines.clear() + self._checked_dynamic_lib_pipelines.clear() + + def _register_and_check(self, item: ResolvedItem) -> None: + # Driver libraries come from the installed display driver rather than a + # CUDA Toolkit line, so they do not need CTK metadata and must not + # create CTK coherence relations by themselves. + if item.packaged_with == "driver": + self._remember(item) + return + self._enforce_supported_packaging(item) + self._enforce_ctk_metadata(item) + self._enforce_constraints(item) + for prior_item in self._resolved_items: + self._enforce_pairwise_compatibility(prior_item, item) + self._enforce_driver_compatibility(item) + self._remember(item) + + def load_nvidia_dynamic_lib(self, libname: str) -> LoadedDL: + """Load a CUDA dynamic library and reject v1-incompatible resolutions. + + ``_load_nvidia_dynamic_lib`` is ``functools.cache``d, so the underlying + OS-level load (``dlopen`` / ``LoadLibraryW``) has already happened by + the time we raise. Subsequent calls for the same library will short- + circuit and never re-trigger the loader, even after this rejection. + """ + loaded = _load_nvidia_dynamic_lib(libname) + try: + self._register_and_check(_resolve_dynamic_lib_item(libname, loaded)) + except CompatibilityCheckError as exc: + # Surface the irreversibility so callers don't assume the rejection + # also unwound the underlying OS load. Mutate the same exception + # instance in place so subclass typing (e.g. + # DriverCtkCompatibilityError) and the original ``__cause__`` are + # preserved. + augmented = ( + f"{exc} Note: the underlying dynamic-library load already happened, " + "and the resulting OS handle remains live for the rest of this process." + ) + exc.args = (augmented, *exc.args[1:]) + raise + return loaded + + def locate_nvidia_header_directory(self, libname: str) -> LocatedHeaderDir | None: + """Locate a CUDA header directory and reject v1-incompatible resolutions.""" + located = _locate_nvidia_header_directory(libname) + if located is None: + return None + self._register_and_check(_resolve_header_item(libname, located)) + return located + + def find_nvidia_header_directory(self, libname: str) -> str | None: + """Locate a CUDA header directory and return only the path string.""" + located = self.locate_nvidia_header_directory(libname) + return None if located is None else located.abs_path + + def locate_static_lib(self, name: str) -> LocatedStaticLib: + """Locate a CUDA static library and reject v1-incompatible resolutions.""" + located = _locate_static_lib(name) + self._register_and_check(_resolve_static_lib_item(located)) + return located + + def find_static_lib(self, name: str) -> str: + """Locate a CUDA static library and return only the path string.""" + abs_path = self.locate_static_lib(name).abs_path + assert isinstance(abs_path, str) + return abs_path + + def locate_bitcode_lib(self, name: str) -> LocatedBitcodeLib: + """Locate a CUDA bitcode library and reject v1-incompatible resolutions.""" + located = _locate_bitcode_lib(name) + self._register_and_check(_resolve_bitcode_lib_item(located)) + return located + + def find_bitcode_lib(self, name: str) -> str: + """Locate a CUDA bitcode library and return only the path string.""" + abs_path = self.locate_bitcode_lib(name).abs_path + assert isinstance(abs_path, str) + return abs_path + + def find_nvidia_binary_utility(self, utility_name: str) -> str | None: + """Locate a CUDA binary utility and reject v1-incompatible resolutions.""" + abs_path = _find_nvidia_binary_utility(utility_name) + if abs_path is None: + return None + self._register_and_check(_resolve_binary_item(utility_name, abs_path)) + assert isinstance(abs_path, str) + return abs_path diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py index e334e04ddf2..cbaa076197b 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py @@ -20,6 +20,8 @@ class DescriptorSpec: site_packages_linux: tuple[str, ...] = () site_packages_windows: tuple[str, ...] = () dependencies: tuple[str, ...] = () + dynamic_link_component: str | None = None + ctk_companion_tags: tuple[str, ...] = () anchor_rel_dirs_linux: tuple[str, ...] = ("lib64", "lib") anchor_rel_dirs_windows: tuple[str, ...] = ("bin/x64", "bin") ctk_root_canary_anchor_libnames: tuple[str, ...] = () @@ -38,6 +40,8 @@ class DescriptorSpec: windows_dlls=("cudart64_12.dll", "cudart64_13.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/cuda_runtime/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cuda_runtime/bin"), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cudart",), ), DescriptorSpec( name="nvfatbin", @@ -46,6 +50,7 @@ class DescriptorSpec: windows_dlls=("nvfatbin_120_0.dll", "nvfatbin_130_0.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/nvfatbin/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/nvfatbin/bin"), + ctk_companion_tags=("api_nvfatbin", "toolchain_cuda_nvcc"), ), DescriptorSpec( name="nvJitLink", @@ -54,6 +59,8 @@ class DescriptorSpec: windows_dlls=("nvJitLink_120_0.dll", "nvJitLink_130_0.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/nvjitlink/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/nvjitlink/bin"), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_nvjitlink",), ), DescriptorSpec( name="nvrtc", @@ -62,6 +69,8 @@ class DescriptorSpec: windows_dlls=("nvrtc64_120_0.dll", "nvrtc64_130_0.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/cuda_nvrtc/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvrtc/bin"), + dynamic_link_component="nvrtc_mathdx", + ctk_companion_tags=("api_nvrtc",), requires_add_dll_directory=True, ), DescriptorSpec( @@ -71,6 +80,7 @@ class DescriptorSpec: windows_dlls=("nvvm64.dll", "nvvm64_40_0.dll", "nvvm70.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/cuda_nvcc/nvvm/lib64"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"), + ctk_companion_tags=("api_nvvm", "toolchain_cuda_nvcc"), anchor_rel_dirs_linux=("nvvm/lib64",), anchor_rel_dirs_windows=("nvvm/bin/*", "nvvm/bin"), ctk_root_canary_anchor_libnames=("cudart",), @@ -83,6 +93,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cublas/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), dependencies=("cublasLt",), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cublas",), ), DescriptorSpec( name="cublasLt", @@ -91,6 +103,8 @@ class DescriptorSpec: windows_dlls=("cublasLt64_12.dll", "cublasLt64_13.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/cublas/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cublaslt",), ), DescriptorSpec( name="cufft", @@ -99,6 +113,8 @@ class DescriptorSpec: windows_dlls=("cufft64_11.dll", "cufft64_12.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/cufft/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cufft/bin"), + dynamic_link_component="cufft_nvshmem", + ctk_companion_tags=("api_cufft",), requires_add_dll_directory=True, ), DescriptorSpec( @@ -109,6 +125,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cufft/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cufft/bin"), dependencies=("cufft",), + dynamic_link_component="cufft_nvshmem", + ctk_companion_tags=("api_cufft",), ), DescriptorSpec( name="curand", @@ -117,6 +135,7 @@ class DescriptorSpec: windows_dlls=("curand64_10.dll",), site_packages_linux=("nvidia/cu13/lib", "nvidia/curand/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/curand/bin"), + ctk_companion_tags=("api_curand",), ), DescriptorSpec( name="cusolver", @@ -126,6 +145,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cusolver/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cusolver/bin"), dependencies=("nvJitLink", "cusparse", "cublasLt", "cublas"), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cusolver",), ), DescriptorSpec( name="cusolverMg", @@ -135,6 +156,7 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cusolver/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cusolver/bin"), dependencies=("nvJitLink", "cublasLt", "cublas"), + dynamic_link_component="cuda_blas_solver_runtime", ), DescriptorSpec( name="cusparse", @@ -144,6 +166,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cusparse/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cusparse/bin"), dependencies=("nvJitLink",), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cusparse",), ), DescriptorSpec( name="nppc", @@ -152,6 +176,8 @@ class DescriptorSpec: windows_dlls=("nppc64_12.dll", "nppc64_13.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppial", @@ -161,6 +187,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppicc", @@ -170,6 +198,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppidei", @@ -179,6 +209,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppif", @@ -188,6 +220,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppig", @@ -197,6 +231,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppim", @@ -206,6 +242,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppist", @@ -215,6 +253,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppisu", @@ -224,6 +264,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nppitc", @@ -233,6 +275,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="npps", @@ -242,6 +286,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/npp/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), dependencies=("nppc",), + dynamic_link_component="npp", + ctk_companion_tags=("api_npp",), ), DescriptorSpec( name="nvblas", @@ -251,6 +297,7 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cublas/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), dependencies=("cublas", "cublasLt"), + dynamic_link_component="cuda_blas_solver_runtime", ), DescriptorSpec( name="nvjpeg", @@ -259,12 +306,15 @@ class DescriptorSpec: windows_dlls=("nvjpeg64_12.dll", "nvjpeg64_13.dll"), site_packages_linux=("nvidia/cu13/lib", "nvidia/nvjpeg/lib"), site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/nvjpeg/bin"), + ctk_companion_tags=("api_nvjpeg",), ), DescriptorSpec( name="cufile", packaged_with="ctk", linux_sonames=("libcufile.so.0",), site_packages_linux=("nvidia/cu13/lib", "nvidia/cufile/lib"), + dynamic_link_component="cufile", + ctk_companion_tags=("api_cufile",), ), DescriptorSpec( name="cupti", @@ -296,6 +346,7 @@ class DescriptorSpec: packaged_with="ctk", linux_sonames=("libcudla.so.1",), site_packages_linux=("nvidia/cu13/lib",), + ctk_companion_tags=("api_cudla",), ), # ----------------------------------------------------------------------- # Third-party / separately packaged libraries @@ -306,6 +357,7 @@ class DescriptorSpec: linux_sonames=("libcublasmp.so.0",), site_packages_linux=("nvidia/cublasmp/cu13/lib", "nvidia/cublasmp/cu12/lib"), dependencies=("cublas", "cublasLt", "nvshmem_host"), + dynamic_link_component="cuda_blas_solver_runtime", ), DescriptorSpec( name="cufftMp", @@ -313,6 +365,7 @@ class DescriptorSpec: linux_sonames=("libcufftMp.so.12", "libcufftMp.so.11"), site_packages_linux=("nvidia/cufftmp/cu13/lib", "nvidia/cufftmp/cu12/lib"), dependencies=("nvshmem_host",), + dynamic_link_component="cufft_nvshmem", requires_rtld_deepbind=True, ), DescriptorSpec( @@ -321,6 +374,8 @@ class DescriptorSpec: linux_sonames=("libcusolverMp.so.0",), site_packages_linux=("nvidia/cu13/lib", "nvidia/cu12/lib"), dependencies=("cublas", "cudart", "cusolver", "nccl"), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cusolvermp",), ), DescriptorSpec( name="mathdx", @@ -330,6 +385,8 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cu12/lib"), site_packages_windows=("nvidia/cu13/bin", "nvidia/cu12/bin"), dependencies=("nvrtc",), + dynamic_link_component="nvrtc_mathdx", + ctk_companion_tags=("api_mathdx",), ), DescriptorSpec( name="cudss", @@ -339,6 +396,7 @@ class DescriptorSpec: site_packages_linux=("nvidia/cu13/lib", "nvidia/cu12/lib"), site_packages_windows=("nvidia/cu13/bin", "nvidia/cu12/bin"), dependencies=("cublas", "cublasLt"), + dynamic_link_component="cuda_blas_solver_runtime", ), DescriptorSpec( name="cusparseLt", @@ -347,6 +405,7 @@ class DescriptorSpec: windows_dlls=("cusparseLt.dll",), site_packages_linux=("nvidia/cu13/lib", "nvidia/cusparselt/lib"), site_packages_windows=("nvidia/cu13/bin/x64", "nvidia/cusparselt/bin"), + ctk_companion_tags=("api_cusparselt",), ), DescriptorSpec( name="cutensor", @@ -356,6 +415,8 @@ class DescriptorSpec: site_packages_linux=("cutensor/lib",), site_packages_windows=("cutensor/bin",), dependencies=("cublasLt",), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_cutensor",), ), DescriptorSpec( name="cutensorMg", @@ -365,12 +426,15 @@ class DescriptorSpec: site_packages_linux=("cutensor/lib",), site_packages_windows=("cutensor/bin",), dependencies=("cutensor", "cublasLt"), + dynamic_link_component="cuda_blas_solver_runtime", ), DescriptorSpec( name="nccl", packaged_with="other", linux_sonames=("libnccl.so.2",), site_packages_linux=("nvidia/nccl/lib",), + dynamic_link_component="cuda_blas_solver_runtime", + ctk_companion_tags=("api_nccl",), ), DescriptorSpec( name="nvpl_fftw", @@ -383,6 +447,8 @@ class DescriptorSpec: packaged_with="other", linux_sonames=("libnvshmem_host.so.3",), site_packages_linux=("nvidia/nvshmem/lib",), + dynamic_link_component="cufft_nvshmem", + ctk_companion_tags=("api_nvshmem",), ), # ----------------------------------------------------------------------- # Driver libraries (system-search only, no CTK cascade) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py index b64fa56cbb4..68cb0cf28d4 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py @@ -17,6 +17,7 @@ class HeaderDescriptorSpec: packaged_with: HeaderPackagedWith header_basename: str site_packages_dirs: tuple[str, ...] = () + ctk_companion_tags: tuple[str, ...] = () available_on_linux: bool = True available_on_windows: bool = True # Relative path(s) from anchor point to the include directory. @@ -45,6 +46,7 @@ class HeaderDescriptorSpec: "nvidia/cu13/include/cccl", # cuda-toolkit[cccl]==13.* "nvidia/cuda_cccl/include", # cuda-toolkit[cccl]==12.* ), + ctk_companion_tags=("toolchain_cuda_nvcc",), include_subdirs=("cccl",), include_subdirs_windows=("targets/x64/cccl", "targets/x64"), ), @@ -53,24 +55,28 @@ class HeaderDescriptorSpec: packaged_with="ctk", header_basename="cublas.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cublas/include"), + ctk_companion_tags=("api_cublas",), ), HeaderDescriptorSpec( name="cudart", packaged_with="ctk", header_basename="cuda_runtime.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cuda_runtime/include"), + ctk_companion_tags=("api_cudart", "toolchain_cuda_nvcc"), ), HeaderDescriptorSpec( name="cufft", packaged_with="ctk", header_basename="cufft.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cufft/include"), + ctk_companion_tags=("api_cufft",), ), HeaderDescriptorSpec( name="cufile", packaged_with="ctk", header_basename="cufile.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cufile/include"), + ctk_companion_tags=("api_cufile",), available_on_windows=False, ), HeaderDescriptorSpec( @@ -78,24 +84,28 @@ class HeaderDescriptorSpec: packaged_with="ctk", header_basename="curand.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/curand/include"), + ctk_companion_tags=("api_curand",), ), HeaderDescriptorSpec( name="cusolver", packaged_with="ctk", header_basename="cusolverDn.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cusolver/include"), + ctk_companion_tags=("api_cusolver",), ), HeaderDescriptorSpec( name="cusparse", packaged_with="ctk", header_basename="cusparse.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cusparse/include"), + ctk_companion_tags=("api_cusparse",), ), HeaderDescriptorSpec( name="npp", packaged_with="ctk", header_basename="npp.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/npp/include"), + ctk_companion_tags=("api_npp",), ), HeaderDescriptorSpec( name="profiler", @@ -108,36 +118,42 @@ class HeaderDescriptorSpec: packaged_with="ctk", header_basename="fatbinary_section.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cuda_nvcc/include"), + ctk_companion_tags=("toolchain_cuda_nvcc",), ), HeaderDescriptorSpec( name="nvfatbin", packaged_with="ctk", header_basename="nvFatbin.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/nvfatbin/include"), + ctk_companion_tags=("api_nvfatbin", "toolchain_cuda_nvcc"), ), HeaderDescriptorSpec( name="nvjitlink", packaged_with="ctk", header_basename="nvJitLink.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/nvjitlink/include"), + ctk_companion_tags=("api_nvjitlink",), ), HeaderDescriptorSpec( name="nvjpeg", packaged_with="ctk", header_basename="nvjpeg.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/nvjpeg/include"), + ctk_companion_tags=("api_nvjpeg",), ), HeaderDescriptorSpec( name="nvrtc", packaged_with="ctk", header_basename="nvrtc.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cuda_nvrtc/include"), + ctk_companion_tags=("api_nvrtc",), ), HeaderDescriptorSpec( name="nvvm", packaged_with="ctk", header_basename="nvvm.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"), + ctk_companion_tags=("api_nvvm", "toolchain_cuda_nvcc"), anchor_include_rel_dirs=("nvvm/include",), ), HeaderDescriptorSpec( @@ -145,6 +161,7 @@ class HeaderDescriptorSpec: packaged_with="ctk", header_basename="cudla.h", site_packages_dirs=("nvidia/cu13/include",), + ctk_companion_tags=("api_cudla",), available_on_windows=False, ), # ----------------------------------------------------------------------- @@ -155,6 +172,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="cusolverMp.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cu12/include"), + ctk_companion_tags=("api_cusolvermp",), available_on_windows=False, conda_targets_layout=False, use_ctk_root_canary=False, @@ -164,6 +182,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="cusparseLt.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cusparselt/include"), + ctk_companion_tags=("api_cusparselt",), conda_targets_layout=False, use_ctk_root_canary=False, ), @@ -172,6 +191,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="cute/tensor.hpp", site_packages_dirs=("cutlass_library/source/include",), + ctk_companion_tags=("toolchain_cuda_nvcc",), conda_targets_layout=False, use_ctk_root_canary=False, ), @@ -180,6 +200,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="cutensor.h", site_packages_dirs=("cutensor/include",), + ctk_companion_tags=("api_cutensor",), conda_targets_layout=False, use_ctk_root_canary=False, ), @@ -188,6 +209,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="cutlass/cutlass.h", site_packages_dirs=("cutlass_library/source/include",), + ctk_companion_tags=("toolchain_cuda_nvcc",), conda_targets_layout=False, use_ctk_root_canary=False, ), @@ -196,6 +218,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="libmathdx.h", site_packages_dirs=("nvidia/cu13/include", "nvidia/cu12/include"), + ctk_companion_tags=("api_mathdx",), conda_targets_layout=False, use_ctk_root_canary=False, ), @@ -204,6 +227,7 @@ class HeaderDescriptorSpec: packaged_with="other", header_basename="nvshmem.h", site_packages_dirs=("nvidia/nvshmem/include",), + ctk_companion_tags=("api_nvshmem",), available_on_windows=False, system_install_dirs=("/usr/include/nvshmem_*",), conda_targets_layout=False, diff --git a/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py new file mode 100644 index 00000000000..46569660f35 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py @@ -0,0 +1,289 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import os +import sys +from collections.abc import Callable +from typing import Protocol, TypeVar, cast + +from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( + find_nvidia_binary_utility as _find_nvidia_binary_utility, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, + DriverCtkCompatibilityError, +) +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + LocatedHeaderDir, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + find_nvidia_header_directory as _find_nvidia_header_directory_impl, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as _locate_nvidia_header_directory, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + LocatedBitcodeLib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + find_bitcode_lib as _find_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + locate_bitcode_lib as _locate_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + LocatedStaticLib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + find_static_lib as _find_static_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + locate_static_lib as _locate_static_lib, +) + +_T = TypeVar("_T") +_COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" +_COMPATIBILITY_GUARD_RAILS_MODES = ("off", "best_effort", "strict") +_COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE = "strict" +assert _COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE in _COMPATIBILITY_GUARD_RAILS_MODES +_DRIVER_COMPATIBILITY_ENV_VAR = "CUDA_PATHFINDER_DRIVER_COMPATIBILITY" +_DRIVER_COMPATIBILITY_MODES = ("default", "assume_forward_compatibility") +_DRIVER_COMPATIBILITY_DEFAULT_MODE = "default" +assert _DRIVER_COMPATIBILITY_DEFAULT_MODE in _DRIVER_COMPATIBILITY_MODES + + +class _ProcessWideGuardRailsApi(Protocol): + def load_nvidia_dynamic_lib(self, libname: str) -> LoadedDL: ... + + def locate_nvidia_header_directory(self, libname: str) -> LocatedHeaderDir | None: ... + + def find_nvidia_header_directory(self, libname: str) -> str | None: ... + + def locate_static_lib(self, name: str) -> LocatedStaticLib: ... + + def find_static_lib(self, name: str) -> str: ... + + def locate_bitcode_lib(self, name: str) -> LocatedBitcodeLib: ... + + def find_bitcode_lib(self, name: str) -> str: ... + + def find_nvidia_binary_utility(self, utility_name: str) -> str | None: ... + + +class _PublicPathfinderModule(Protocol): + process_wide_compatibility_guard_rails: object + + +process_wide_compatibility_guard_rails: CompatibilityGuardRails = CompatibilityGuardRails() + + +def _compatibility_guard_rails_mode() -> str: + value = os.environ.get(_COMPATIBILITY_GUARD_RAILS_ENV_VAR) + if not value: + return _COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE + if value in _COMPATIBILITY_GUARD_RAILS_MODES: + return value + allowed_values = ", ".join(repr(mode) for mode in _COMPATIBILITY_GUARD_RAILS_MODES) + raise RuntimeError( + f"Invalid {_COMPATIBILITY_GUARD_RAILS_ENV_VAR}={value!r}. " + f"Allowed values: {allowed_values}. " + f"Unset or empty defaults to {_COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE!r}." + ) + + +def _driver_compatibility_mode() -> str: + """Return the configured driver-compatibility mode after validating its value. + + The platform-specific restriction for ``assume_forward_compatibility`` is + deferred to ``_enforce_driver_compatibility_platform``: if guard rails are + turned off entirely, an unsupported platform should not raise just because + this env var happens to be set. + """ + value = os.environ.get(_DRIVER_COMPATIBILITY_ENV_VAR) + if not value: + return _DRIVER_COMPATIBILITY_DEFAULT_MODE + if value not in _DRIVER_COMPATIBILITY_MODES: + allowed_values = ", ".join(repr(mode) for mode in _DRIVER_COMPATIBILITY_MODES) + raise RuntimeError( + f"Invalid {_DRIVER_COMPATIBILITY_ENV_VAR}={value!r}. " + f"Allowed values: {allowed_values}. " + f"Unset or empty defaults to {_DRIVER_COMPATIBILITY_DEFAULT_MODE!r}." + ) + return value + + +def _enforce_driver_compatibility_platform(driver_compatibility_mode: str) -> None: + if driver_compatibility_mode == "assume_forward_compatibility" and not sys.platform.startswith("linux"): + raise RuntimeError(f"{_DRIVER_COMPATIBILITY_ENV_VAR}={driver_compatibility_mode!r} is only supported on Linux.") + + +def _driver_compatibility_override_hint() -> str: + return ( + "On supported Linux systems that intentionally rely on NVIDIA forward compatibility " + f"(`cuda-compat-*`), set {_DRIVER_COMPATIBILITY_ENV_VAR}=assume_forward_compatibility " + "to bypass this driver-vs-CTK check. This does not relax CTK-coherence checks " + "between headers, libraries, and compiler/JIT components." + ) + + +def _with_driver_compatibility_hint(message: str) -> str: + if _DRIVER_COMPATIBILITY_ENV_VAR in message: + return message + return f"{message} {_driver_compatibility_override_hint()}" + + +def _public_module() -> _PublicPathfinderModule | None: + public_module = sys.modules.get("cuda.pathfinder") + if public_module is None: + return None + return cast(_PublicPathfinderModule, public_module) + + +def _current_process_wide_compatibility_guard_rails() -> _ProcessWideGuardRailsApi: + public_module = _public_module() + if public_module is None: + return cast(_ProcessWideGuardRailsApi, process_wide_compatibility_guard_rails) + return cast(_ProcessWideGuardRailsApi, public_module.process_wide_compatibility_guard_rails) + + +def _reset_process_wide_compatibility_guard_rails() -> None: + current = _current_process_wide_compatibility_guard_rails() + if isinstance(current, CompatibilityGuardRails): + current._reset_state() + return + public_module = _public_module() + if public_module is None: + global process_wide_compatibility_guard_rails + process_wide_compatibility_guard_rails = CompatibilityGuardRails() + return + public_module.process_wide_compatibility_guard_rails = CompatibilityGuardRails() + + +def _try_process_wide_guard_rails_then_fallback(guard_rails_call: Callable[[], _T], raw_call: Callable[[], _T]) -> _T: + driver_compatibility_mode = _driver_compatibility_mode() + mode = _compatibility_guard_rails_mode() + if mode == "off": + return raw_call() + _enforce_driver_compatibility_platform(driver_compatibility_mode) + try: + return guard_rails_call() + except CompatibilityInsufficientMetadataError: + if mode == "best_effort": + return raw_call() + raise + except DriverCtkCompatibilityError as exc: + if driver_compatibility_mode == "assume_forward_compatibility": + return raw_call() + # The forward-compat hint is appended only on Linux because the + # underlying ``cuda-compat-*`` packages (and the + # ``CUDA_PATHFINDER_DRIVER_COMPATIBILITY=assume_forward_compatibility`` + # override they justify) are NVIDIA's Linux-only forward-compat + # contract; there is no equivalent on Windows / macOS, so suggesting + # the override on those platforms would be misleading. + if sys.platform.startswith("linux"): + raise DriverCtkCompatibilityError(_with_driver_compatibility_hint(str(exc))) from exc + raise + + +def _cache_clear_with_process_state_reset(cache_clear: Callable[[], object]) -> Callable[[], None]: + def clear() -> None: + cache_clear() + _reset_process_wide_compatibility_guard_rails() + + return clear + + +def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: + """Load a CUDA dynamic library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().load_nvidia_dynamic_lib(libname), + lambda: _load_nvidia_dynamic_lib(libname), + ) + + +def locate_nvidia_header_directory(libname: str) -> LocatedHeaderDir | None: + """Locate a CUDA header directory via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_nvidia_header_directory(libname), + lambda: _locate_nvidia_header_directory(libname), + ) + + +def find_nvidia_header_directory(libname: str) -> str | None: + """Locate a CUDA header directory and return its path string.""" + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_nvidia_header_directory(libname), + lambda: _find_nvidia_header_directory_impl(libname), + ) + assert abs_path is None or isinstance(abs_path, str) + return abs_path + + +def locate_static_lib(name: str) -> LocatedStaticLib: + """Locate a CUDA static library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_static_lib(name), + lambda: _locate_static_lib(name), + ) + + +def find_static_lib(name: str) -> str: + """Locate a CUDA static library and return its path string.""" + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_static_lib(name), + lambda: _find_static_lib(name), + ) + assert isinstance(abs_path, str) + return abs_path + + +def locate_bitcode_lib(name: str) -> LocatedBitcodeLib: + """Locate a CUDA bitcode library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_bitcode_lib(name), + lambda: _locate_bitcode_lib(name), + ) + + +def find_bitcode_lib(name: str) -> str: + """Locate a CUDA bitcode library and return its path string.""" + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_bitcode_lib(name), + lambda: _find_bitcode_lib(name), + ) + assert isinstance(abs_path, str) + return abs_path + + +def find_nvidia_binary_utility(utility_name: str) -> str | None: + """Locate a CUDA binary utility via the process-wide compatibility guard rails.""" + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_nvidia_binary_utility(utility_name), + lambda: _find_nvidia_binary_utility(utility_name), + ) + assert abs_path is None or isinstance(abs_path, str) + return abs_path + + +load_nvidia_dynamic_lib.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _load_nvidia_dynamic_lib.cache_clear +) +locate_nvidia_header_directory.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _locate_nvidia_header_directory.cache_clear +) +_locate_static_lib_cache_clear = _cache_clear_with_process_state_reset(_locate_static_lib.cache_clear) +locate_static_lib.cache_clear = _locate_static_lib_cache_clear # type: ignore[attr-defined] +find_static_lib.cache_clear = _locate_static_lib_cache_clear # type: ignore[attr-defined] +_locate_bitcode_lib_cache_clear = _cache_clear_with_process_state_reset(_locate_bitcode_lib.cache_clear) +locate_bitcode_lib.cache_clear = _locate_bitcode_lib_cache_clear # type: ignore[attr-defined] +find_bitcode_lib.cache_clear = _locate_bitcode_lib_cache_clear # type: ignore[attr-defined] +find_nvidia_binary_utility.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _find_nvidia_binary_utility.cache_clear +) diff --git a/cuda_pathfinder/cuda/pathfinder/_static_libs/find_bitcode_lib.py b/cuda_pathfinder/cuda/pathfinder/_static_libs/find_bitcode_lib.py index ac038aadfe7..5b40a51d8d3 100644 --- a/cuda_pathfinder/cuda/pathfinder/_static_libs/find_bitcode_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_static_libs/find_bitcode_lib.py @@ -30,6 +30,7 @@ class _BitcodeLibInfo(TypedDict): rel_path: str site_packages_dirs: tuple[str, ...] available_on_windows: bool + ctk_companion_tags: tuple[str, ...] _SUPPORTED_BITCODE_LIBS_INFO: dict[str, _BitcodeLibInfo] = { @@ -41,18 +42,21 @@ class _BitcodeLibInfo(TypedDict): "nvidia/cuda_nvcc/nvvm/libdevice", ), "available_on_windows": True, + "ctk_companion_tags": ("toolchain_cuda_nvcc",), }, "nccl_device": { "filename": "libnccl_device.bc", "rel_path": "lib", "site_packages_dirs": ("nvidia/nccl/lib",), "available_on_windows": False, + "ctk_companion_tags": ("api_nccl",), }, "nvshmem_device": { "filename": "libnvshmem_device.bc", "rel_path": "lib", "site_packages_dirs": ("nvidia/nvshmem/lib",), "available_on_windows": False, + "ctk_companion_tags": ("api_nvshmem",), }, } @@ -62,6 +66,9 @@ class _BitcodeLibInfo(TypedDict): name for name, info in _SUPPORTED_BITCODE_LIBS_INFO.items() if not IS_WINDOWS or info["available_on_windows"] ) ) +SUPPORTED_BITCODE_LIBS_CTK_COMPANION_TAGS = { + name: info["ctk_companion_tags"] for name, info in _SUPPORTED_BITCODE_LIBS_INFO.items() +} def _no_such_file_in_dir(dir_path: str, filename: str, error_messages: list[str], attachments: list[str]) -> None: @@ -130,6 +137,7 @@ def raise_not_found_error(self) -> NoReturn: raise BitcodeLibNotFoundError(f'Failure finding "{self.filename}": {err}\n{att}') +@functools.cache def locate_bitcode_lib(name: str) -> LocatedBitcodeLib: """Locate a bitcode library by name. @@ -169,7 +177,6 @@ def locate_bitcode_lib(name: str) -> LocatedBitcodeLib: finder.raise_not_found_error() -@functools.cache def find_bitcode_lib(name: str) -> str: """Find the absolute path to a bitcode library. @@ -178,3 +185,6 @@ def find_bitcode_lib(name: str) -> str: BitcodeLibNotFoundError: If the bitcode library cannot be found. """ return locate_bitcode_lib(name).abs_path + + +find_bitcode_lib.cache_clear = locate_bitcode_lib.cache_clear # type: ignore[attr-defined] diff --git a/cuda_pathfinder/cuda/pathfinder/_static_libs/find_static_lib.py b/cuda_pathfinder/cuda/pathfinder/_static_libs/find_static_lib.py index 22cea7daad8..631f4557f1f 100644 --- a/cuda_pathfinder/cuda/pathfinder/_static_libs/find_static_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_static_libs/find_static_lib.py @@ -28,24 +28,29 @@ class LocatedStaticLib: class _StaticLibInfo(TypedDict): filename: str ctk_rel_paths: tuple[str, ...] - conda_rel_path: str + conda_rel_paths: tuple[str, ...] site_packages_dirs: tuple[str, ...] + ctk_companion_tags: tuple[str, ...] _SUPPORTED_STATIC_LIBS_INFO: dict[str, _StaticLibInfo] = { "cudadevrt": { "filename": "cudadevrt.lib" if IS_WINDOWS else "libcudadevrt.a", "ctk_rel_paths": (os.path.join("lib", "x64"),) if IS_WINDOWS else ("lib64", "lib"), - "conda_rel_path": os.path.join("lib", "x64") if IS_WINDOWS else "lib", + "conda_rel_paths": ((os.path.join("lib", "x64"), "lib") if IS_WINDOWS else ("lib",)), "site_packages_dirs": ( ("nvidia/cu13/lib/x64", "nvidia/cuda_runtime/lib/x64") if IS_WINDOWS else ("nvidia/cu13/lib", "nvidia/cuda_runtime/lib") ), + "ctk_companion_tags": ("toolchain_cuda_nvcc",), }, } SUPPORTED_STATIC_LIBS: tuple[str, ...] = tuple(sorted(_SUPPORTED_STATIC_LIBS_INFO.keys())) +SUPPORTED_STATIC_LIBS_CTK_COMPANION_TAGS = { + name: info["ctk_companion_tags"] for name, info in _SUPPORTED_STATIC_LIBS_INFO.items() +} def _no_such_file_in_dir(dir_path: str, filename: str, error_messages: list[str], attachments: list[str]) -> None: @@ -66,7 +71,7 @@ def __init__(self, name: str) -> None: self.config: _StaticLibInfo = _SUPPORTED_STATIC_LIBS_INFO[name] self.filename: str = self.config["filename"] self.ctk_rel_paths: tuple[str, ...] = self.config["ctk_rel_paths"] - self.conda_rel_path: str = self.config["conda_rel_path"] + self.conda_rel_paths: tuple[str, ...] = self.config["conda_rel_paths"] self.site_packages_dirs: tuple[str, ...] = self.config["site_packages_dirs"] self.error_messages: list[str] = [] self.attachments: list[str] = [] @@ -86,9 +91,10 @@ def try_with_conda_prefix(self) -> str | None: return None anchor = os.path.join(conda_prefix, "Library") if IS_WINDOWS else conda_prefix - file_path = os.path.join(anchor, self.conda_rel_path, self.filename) - if os.path.isfile(file_path): - return file_path + for rel_path in self.conda_rel_paths: + file_path = os.path.join(anchor, rel_path, self.filename) + if os.path.isfile(file_path): + return file_path return None def try_with_cuda_home(self) -> str | None: @@ -116,6 +122,7 @@ def raise_not_found_error(self) -> NoReturn: raise StaticLibNotFoundError(f'Failure finding "{self.filename}": {err}\n{att}') +@functools.cache def locate_static_lib(name: str) -> LocatedStaticLib: """Locate a static library by name. @@ -155,7 +162,6 @@ def locate_static_lib(name: str) -> LocatedStaticLib: finder.raise_not_found_error() -@functools.cache def find_static_lib(name: str) -> str: """Find the absolute path to a static library. @@ -164,3 +170,6 @@ def find_static_lib(name: str) -> str: StaticLibNotFoundError: If the static library cannot be found. """ return locate_static_lib(name).abs_path + + +find_static_lib.cache_clear = locate_static_lib.cache_clear # type: ignore[attr-defined] diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py b/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py index a5d4d167d33..fdca2c9ac7b 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py @@ -5,21 +5,33 @@ import ctypes import functools +import re +import sys from collections.abc import Callable from dataclasses import dataclass +from typing import cast from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, ) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS +from cuda.pathfinder._utils.toolkit_info import EncodedCudaVersion + +_NVML_SUCCESS = 0 +_NVML_SYSTEM_DRIVER_VERSION_BUFFER_LENGTH = 80 +_DRIVER_RELEASE_VERSION_RE = re.compile(r"^\d+(?:\.\d+){1,2}$") class QueryDriverCudaVersionError(RuntimeError): """Raised when ``query_driver_cuda_version()`` cannot determine the CUDA driver version.""" +class QueryDriverReleaseVersionError(RuntimeError): + """Raised when ``query_driver_release_version()`` cannot determine the display-driver release version.""" + + @dataclass(frozen=True, slots=True) -class DriverCudaVersion: +class DriverCudaVersion(EncodedCudaVersion): """ CUDA-facing driver version reported by ``cuDriverGetVersion()``. @@ -41,9 +53,36 @@ class DriverCudaVersion: to ``Driver Version: 595.58.03``. """ - encoded: int - major: int - minor: int + +@dataclass(frozen=True, slots=True) +class DriverReleaseVersion: + """ + Display-driver release version shown as ``Driver Version`` in ``nvidia-smi``. + + Example ``nvidia-smi`` output:: + + +---------------------------------------------------------------------+ + | NVIDIA-SMI 595.58.03 Driver Version: 595.58.03 CUDA Version: 13.2 | + +---------------------------------------------------------------------+ + + For the example above, ``DriverReleaseVersion(text="595.58.03", + components=(595, 58, 3), branch=595)`` corresponds to ``Driver Version: + 595.58.03``. The ``branch`` field is the first numeric component because + NVIDIA's compatibility docs publish minimum display-driver requirements in + branch form such as ``>= 580`` for CUDA 13.x minor-version compatibility. + """ + + text: str + components: tuple[int, ...] + branch: int + + @classmethod + def from_text(cls, text: str) -> DriverReleaseVersion: + normalized_text = text.strip() + if not _DRIVER_RELEASE_VERSION_RE.fullmatch(normalized_text): + raise ValueError(f"Invalid driver release version text: {text!r}") + components = tuple(int(component) for component in normalized_text.split(".")) + return cls(text=normalized_text, components=components, branch=components[0]) @functools.cache @@ -51,15 +90,20 @@ def query_driver_cuda_version() -> DriverCudaVersion: """Return the CUDA driver version parsed into its major/minor components.""" try: encoded = _query_driver_cuda_version_int() - return DriverCudaVersion( - encoded=encoded, - major=encoded // 1000, - minor=(encoded % 1000) // 10, - ) + return cast(DriverCudaVersion, DriverCudaVersion.from_encoded(encoded)) except Exception as exc: raise QueryDriverCudaVersionError("Failed to query the CUDA driver version.") from exc +@functools.cache +def query_driver_release_version() -> DriverReleaseVersion: + """Return the display-driver release version parsed into branch/components.""" + try: + return DriverReleaseVersion.from_text(_query_driver_release_version_text()) + except Exception as exc: + raise QueryDriverReleaseVersionError("Failed to query the display-driver release version.") from exc + + def _query_driver_cuda_version_int() -> int: """Return the encoded CUDA driver version from ``cuDriverGetVersion()``.""" loaded_cuda = _load_nvidia_dynamic_lib("cuda") @@ -78,3 +122,47 @@ def _query_driver_cuda_version_int() -> int: if status != 0: raise RuntimeError(f"Failed to query CUDA driver version via cuDriverGetVersion() (status={status}).") return version.value + + +def _query_driver_release_version_text() -> str: + """Return the display-driver release version from ``nvmlSystemGetDriverVersion()``.""" + loaded_nvml = _load_nvidia_dynamic_lib("nvml") + nvml_lib = ctypes.CDLL(loaded_nvml.abs_path) + + nvml_init_v2 = nvml_lib.nvmlInit_v2 + nvml_init_v2.argtypes = [] + nvml_init_v2.restype = ctypes.c_int + + nvml_system_get_driver_version = nvml_lib.nvmlSystemGetDriverVersion + nvml_system_get_driver_version.argtypes = [ctypes.POINTER(ctypes.c_char), ctypes.c_uint] + nvml_system_get_driver_version.restype = ctypes.c_int + + nvml_shutdown = nvml_lib.nvmlShutdown + nvml_shutdown.argtypes = [] + nvml_shutdown.restype = ctypes.c_int + + # NVML's init/shutdown pair is reference-counted (see "Initialization and + # Cleanup" in the NVML API docs), so this balanced pair is safe even when + # the caller has already initialized NVML elsewhere in the process. + init_status = nvml_init_v2() + if init_status != _NVML_SUCCESS: + raise RuntimeError(f"Failed to initialize NVML via nvmlInit_v2() (status={init_status}).") + + try: + version_buffer = ctypes.create_string_buffer(_NVML_SYSTEM_DRIVER_VERSION_BUFFER_LENGTH) + status = nvml_system_get_driver_version(version_buffer, _NVML_SYSTEM_DRIVER_VERSION_BUFFER_LENGTH) + if status != _NVML_SUCCESS: + raise RuntimeError( + f"Failed to query driver release version via nvmlSystemGetDriverVersion() (status={status})." + ) + release_version = version_buffer.value.decode() + finally: + # Balance the init_v2() above unconditionally. If the body already + # raised, let that error win; a non-zero shutdown status here would + # only mask the more useful root cause (Python keeps it on + # ``__context__`` for debugging). ``sys.exc_info()[1]`` is the + # currently-propagating exception inside the finally, or None. + shutdown_status = nvml_shutdown() + if shutdown_status != _NVML_SUCCESS and sys.exc_info()[1] is None: + raise RuntimeError(f"Failed to shut down NVML via nvmlShutdown() (status={shutdown_status}).") + return release_version diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py new file mode 100644 index 00000000000..431727bf4bb --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import functools +import re +from dataclasses import dataclass +from pathlib import Path +from typing import TypeVar + +_CUDA_VERSION_RE = re.compile(r"^\s*#\s*define\s+CUDA_VERSION\s+(?P\d+)\b", re.MULTILINE) +EncodedCudaVersionT = TypeVar("EncodedCudaVersionT", bound="EncodedCudaVersion") + + +@dataclass(frozen=True, slots=True) +class EncodedCudaVersion: + """CUDA major/minor version represented in CUDA's integer ``encoded`` form.""" + + encoded: int + major: int + minor: int + + @classmethod + def from_encoded(cls: type[EncodedCudaVersionT], encoded: int | str) -> EncodedCudaVersionT: + if isinstance(encoded, str): + try: + encoded_int = int(encoded) + except ValueError as exc: + raise ValueError( + f"{cls.__name__}.from_encoded() expected an integer or decimal string, got {encoded!r}." + ) from exc + elif isinstance(encoded, int): + encoded_int = encoded + else: + raise TypeError( + f"{cls.__name__}.from_encoded() expected an integer or decimal string, got {type(encoded).__name__}." + ) + if encoded_int < 0: + raise ValueError( + f"{cls.__name__}.from_encoded() expected a non-negative encoded CUDA version, got {encoded_int}." + ) + # CUDA encodes versions as major * 1000 + minor * 10. The least-significant + # decimal is ignored here: it is 0 in all CUDA releases and is not a patch version. + return cls( + encoded=encoded_int, + major=encoded_int // 1000, + minor=(encoded_int % 1000) // 10, + ) + + +class ReadCudaHeaderVersionError(RuntimeError): + """Raised when ``read_cuda_header_version()`` cannot determine the CTK version from ``cuda.h``.""" + + +@dataclass(frozen=True, slots=True) +class CudaToolkitVersion(EncodedCudaVersion): + """CUDA Toolkit version encoded by the ``CUDA_VERSION`` macro in ``cuda.h``.""" + + +def parse_cuda_header_version(header_text: str) -> CudaToolkitVersion | None: + """Parse the CUDA Toolkit major/minor version from ``cuda.h`` text.""" + match = _CUDA_VERSION_RE.search(header_text) + if match is None: + return None + return CudaToolkitVersion.from_encoded(match.group("encoded")) + + +@functools.cache +def read_cuda_header_version(cuda_header_path: str) -> CudaToolkitVersion: + """Read and parse the CUDA Toolkit major/minor version from ``cuda.h``.""" + try: + header_text = Path(cuda_header_path).read_text(encoding="utf-8", errors="replace") + version = parse_cuda_header_version(header_text) + if version is None: + raise RuntimeError(f"{cuda_header_path!r} does not define CUDA_VERSION.") + return version + except Exception as exc: + raise ReadCudaHeaderVersionError( + f"Failed to read the CUDA Toolkit version from cuda.h at {cuda_header_path!r}." + ) from exc diff --git a/cuda_pathfinder/docs/source/api.rst b/cuda_pathfinder/docs/source/api.rst index e49478c09ec..45c815ec472 100644 --- a/cuda_pathfinder/docs/source/api.rst +++ b/cuda_pathfinder/docs/source/api.rst @@ -18,6 +18,11 @@ CUDA bitcode and static libraries. get_cuda_path_or_home + CompatibilityGuardRails + process_wide_compatibility_guard_rails + CompatibilityCheckError + CompatibilityInsufficientMetadataError + DriverCtkCompatibilityError SUPPORTED_NVIDIA_LIBNAMES load_nvidia_dynamic_lib LoadedDL diff --git a/cuda_pathfinder/pixi.lock b/cuda_pathfinder/pixi.lock index 767d3000ffa..2eec9b00dbb 100644 --- a/cuda_pathfinder/pixi.lock +++ b/cuda_pathfinder/pixi.lock @@ -1366,6 +1366,7 @@ packages: variants: target_platform: noarch depends: + - packaging - python >=3.10 - python * license: Apache-2.0 diff --git a/cuda_pathfinder/pixi.toml b/cuda_pathfinder/pixi.toml index 7ebcc9644d7..2713d19ca7b 100644 --- a/cuda_pathfinder/pixi.toml +++ b/cuda_pathfinder/pixi.toml @@ -70,6 +70,7 @@ setuptools = ">=80" setuptools-scm = ">=8" [package.run-dependencies] +packaging = "*" python = ">=3.10" [target.linux.tasks.test] diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 7d96e720232..59b5a20f29f 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -8,7 +8,7 @@ authors = [{ name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.co license = "Apache-2.0" requires-python = ">=3.10" dynamic = ["version", "readme"] -dependencies = [] +dependencies = ["packaging"] [dependency-groups] test = [ diff --git a/cuda_pathfinder/tests/compatibility_guard_rails_test_utils.py b/cuda_pathfinder/tests/compatibility_guard_rails_test_utils.py new file mode 100644 index 00000000000..b1ca7887557 --- /dev/null +++ b/cuda_pathfinder/tests/compatibility_guard_rails_test_utils.py @@ -0,0 +1,212 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import importlib +import os +from pathlib import Path + +import pytest +from local_helpers import ( + have_distribution, + locate_real_cuda_toolkit_version_from_cuda_h, +) + +import cuda.pathfinder._compatibility_guard_rails as compatibility_module +from cuda.pathfinder import LoadedDL, LocatedBitcodeLib, LocatedStaticLib +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _resolve_system_loaded_abs_path_in_subprocess +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as locate_nvidia_header_directory_raw, +) +from cuda.pathfinder._utils import driver_info +from cuda.pathfinder._utils.driver_info import DriverCudaVersion, DriverReleaseVersion +from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home +from cuda.pathfinder._utils.toolkit_info import read_cuda_header_version + +STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_COMPATIBILITY_GUARD_RAILS_STRICTNESS", "see_what_works") +assert STRICTNESS in ("see_what_works", "all_must_work") +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" +DRIVER_COMPATIBILITY_ENV_VAR = "CUDA_PATHFINDER_DRIVER_COMPATIBILITY" +process_wide_module = importlib.import_module("cuda.pathfinder._process_wide_compatibility_guard_rails") + + +@pytest.fixture(autouse=True) +def _default_process_wide_guard_rails_mode(monkeypatch): + monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) + monkeypatch.delenv(DRIVER_COMPATIBILITY_ENV_VAR, raising=False) + + +@pytest.fixture +def clear_real_host_probe_caches(): + have_distribution.cache_clear() + locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() + locate_nvidia_header_directory_raw.cache_clear() + _resolve_system_loaded_abs_path_in_subprocess.cache_clear() + get_cuda_path_or_home.cache_clear() + read_cuda_header_version.cache_clear() + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() + yield + have_distribution.cache_clear() + locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() + locate_nvidia_header_directory_raw.cache_clear() + _resolve_system_loaded_abs_path_in_subprocess.cache_clear() + get_cuda_path_or_home.cache_clear() + read_cuda_header_version.cache_clear() + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() + + +def _write_cuda_h( + ctk_root: Path, + toolkit_version: str, + *, + include_dir_parts: tuple[str, ...] = ("targets", "x86_64-linux", "include"), +) -> None: + parts = toolkit_version.split(".") + if len(parts) < 2: + raise AssertionError(f"Expected at least major.minor in toolkit version, got {toolkit_version!r}") + encoded = int(parts[0]) * 1000 + int(parts[1]) * 10 + cuda_h_path = ctk_root.joinpath(*include_dir_parts, "cuda.h") + cuda_h_path.parent.mkdir(parents=True, exist_ok=True) + cuda_h_path.write_text( + f"#ifndef CUDA_H\n#define CUDA_H\n#define CUDA_VERSION {encoded}\n#endif\n", + encoding="utf-8", + ) + + +def _make_ctk_root( + ctk_root: Path, + toolkit_version: str, + *, + include_dir_parts: tuple[str, ...] = ("targets", "x86_64-linux", "include"), +) -> Path: + _write_cuda_h(ctk_root, toolkit_version, include_dir_parts=include_dir_parts) + return ctk_root + + +def _touch(path: Path) -> str: + path.parent.mkdir(parents=True, exist_ok=True) + path.touch() + return str(path) + + +def _touch_ctk_file( + ctk_root: Path, + toolkit_version: str, + relative_path: str | Path, + *, + include_dir_parts: tuple[str, ...] = ("targets", "x86_64-linux", "include"), +) -> str: + _make_ctk_root(ctk_root, toolkit_version, include_dir_parts=include_dir_parts) + return _touch(ctk_root / Path(relative_path)) + + +def _loaded_dl(abs_path: str, *, found_via: str = "CUDA_PATH") -> LoadedDL: + return LoadedDL( + abs_path=abs_path, + was_already_loaded_from_elsewhere=False, + _handle_uint=1, + found_via=found_via, + ) + + +def _patch_dynamic_lib_loader(monkeypatch, **loaded_by_libname: LoadedDL) -> None: + def fake_load_nvidia_dynamic_lib(libname: str) -> LoadedDL: + loaded = loaded_by_libname.get(libname) + if loaded is None: + raise AssertionError(f"Unexpected libname: {libname!r}") + return loaded + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", fake_load_nvidia_dynamic_lib) + + +def _located_static_lib(name: str, abs_path: str) -> LocatedStaticLib: + return LocatedStaticLib( + name=name, + abs_path=abs_path, + filename=os.path.basename(abs_path), + found_via="CUDA_PATH", + ) + + +def _located_bitcode_lib(name: str, abs_path: str) -> LocatedBitcodeLib: + return LocatedBitcodeLib( + name=name, + abs_path=abs_path, + filename=os.path.basename(abs_path), + found_via="CUDA_PATH", + ) + + +def _driver_cuda_version(encoded: int) -> DriverCudaVersion: + return DriverCudaVersion.from_encoded(encoded) + + +def _driver_release_version(text: str) -> DriverReleaseVersion: + return DriverReleaseVersion.from_text(text) + + +class _FakeDistribution: + def __init__( + self, + *, + name: str, + version: str, + root: Path, + files: tuple[str, ...] = (), + requires: tuple[str, ...] = (), + ) -> None: + self.metadata = {"Name": name} + self.version = version + self.files = tuple(Path(file) for file in files) + self.requires = list(requires) + self._root = root + + def locate_file(self, file: Path) -> Path: + return self._root / file + + +def _assert_real_ctk_backed_path(path: str) -> None: + norm_path = os.path.normpath(os.path.abspath(path)) + if "site-packages" in Path(norm_path).parts: + return + current = Path(norm_path) + if current.is_file(): + current = current.parent + for candidate in (current, *current.parents): + if (candidate / "include" / "cuda.h").is_file(): + return + if any(path.is_file() for path in (candidate / "targets").glob("*/include/cuda.h")): + return + for env_var in ("CUDA_PATH", "CUDA_HOME"): + ctk_root = os.environ.get(env_var) + if not ctk_root: + continue + norm_ctk_root = os.path.normpath(os.path.abspath(ctk_root)) + if os.path.commonpath((norm_path, norm_ctk_root)) == norm_ctk_root: + return + raise AssertionError( + "Expected a site-packages path, a path under a CTK root with cuda.h, " + f"or a path under CUDA_PATH/CUDA_HOME, got {path!r}" + ) + + +class _DelegatingProcessWideGuardRails: + def __init__(self, method_name: str, return_value: object) -> None: + self._method_name = method_name + self._return_value = return_value + self.calls: list[tuple[str, tuple[object, ...]]] = [] + + def __getattr__(self, name: str): + if name != self._method_name: + raise AttributeError(name) + + def delegated(*args: object) -> object: + self.calls.append((name, args)) + return self._return_value + + return delegated diff --git a/cuda_pathfinder/tests/conftest.py b/cuda_pathfinder/tests/conftest.py index e8a5e11b391..583794def8b 100644 --- a/cuda_pathfinder/tests/conftest.py +++ b/cuda_pathfinder/tests/conftest.py @@ -31,6 +31,11 @@ def _append(message): return _append +@pytest.fixture +def disable_process_wide_compatibility_guard_rails(monkeypatch): + monkeypatch.setenv("CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS", "off") + + def skip_if_missing_libnvcudla_so(libname: str, *, timeout: float) -> None: if libname not in ("cudla", "nvcudla"): return diff --git a/cuda_pathfinder/tests/local_helpers.py b/cuda_pathfinder/tests/local_helpers.py index 7893ba8229f..bfcfbe207c9 100644 --- a/cuda_pathfinder/tests/local_helpers.py +++ b/cuda_pathfinder/tests/local_helpers.py @@ -4,6 +4,26 @@ import functools import importlib.metadata import re +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as locate_nvidia_header_directory_raw, +) +from cuda.pathfinder._utils import driver_info +from cuda.pathfinder._utils.toolkit_info import CudaToolkitVersion, read_cuda_header_version + + +@dataclass(frozen=True, slots=True) +class LocatedRealCudaToolkitVersion: + """Real-host CTK version discovered from ``cuda.h`` next to resolved ``cudart`` headers.""" + + version: CudaToolkitVersion + cuda_h_path: str + header_dir: str + found_via: str @functools.cache @@ -14,3 +34,46 @@ def have_distribution(name_pattern: str) -> bool: for dist in importlib.metadata.distributions() if "Name" in dist.metadata ) + + +@functools.cache +def locate_real_cuda_toolkit_version_from_cuda_h() -> LocatedRealCudaToolkitVersion | None: + """Return the real-host CTK version from ``cuda.h`` if ``cudart`` headers can be located.""" + located = locate_nvidia_header_directory_raw("cudart") + if located is None or located.abs_path is None: + return None + cuda_h_path = Path(located.abs_path) / "cuda.h" + if not cuda_h_path.is_file(): + return None + return LocatedRealCudaToolkitVersion( + version=read_cuda_header_version(str(cuda_h_path)), + cuda_h_path=str(cuda_h_path), + header_dir=located.abs_path, + found_via=located.found_via, + ) + + +def require_real_cuda_toolkit_version_from_cuda_h() -> LocatedRealCudaToolkitVersion: + """Return the real-host CTK version from ``cuda.h`` or skip if it cannot be located.""" + located = locate_nvidia_header_directory_raw("cudart") + if located is None or located.abs_path is None: + pytest.skip("Could not locate cudart headers, so could not find cuda.h for a real CTK installation.") + cuda_h_path = Path(located.abs_path) / "cuda.h" + if not cuda_h_path.is_file(): + pytest.skip( + f"Located cudart headers via {located.found_via} at {located.abs_path!r}, but could not find cuda.h." + ) + return LocatedRealCudaToolkitVersion( + version=read_cuda_header_version(str(cuda_h_path)), + cuda_h_path=str(cuda_h_path), + header_dir=located.abs_path, + found_via=located.found_via, + ) + + +def require_real_driver_cuda_version() -> driver_info.DriverCudaVersion: + """Return the real-host CUDA driver version or skip if it cannot be queried.""" + try: + return driver_info.query_driver_cuda_version() + except driver_info.QueryDriverCudaVersionError as exc: + pytest.skip(f"Could not query the CUDA driver version for a real driver installation: {exc}") diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py new file mode 100644 index 00000000000..3d6af29586d --- /dev/null +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -0,0 +1,662 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pytest +from compatibility_guard_rails_test_utils import ( + _default_process_wide_guard_rails_mode, # noqa: F401 + _driver_cuda_version, + _driver_release_version, + _FakeDistribution, + _loaded_dl, + _located_bitcode_lib, + _located_static_lib, + _make_ctk_root, + _patch_dynamic_lib_loader, + _touch, + _touch_ctk_file, + _write_cuda_h, +) +from packaging.specifiers import SpecifierSet + +import cuda.pathfinder._compatibility_guard_rails as compatibility_module +from cuda.pathfinder import ( + CompatibilityCheckError, + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, + DriverCtkCompatibilityError, + LocatedHeaderDir, +) +from cuda.pathfinder._binaries.supported_nvidia_binaries import SUPPORTED_BINARIES_ALL +from cuda.pathfinder._static_libs.find_bitcode_lib import SUPPORTED_BITCODE_LIBS +from cuda.pathfinder._static_libs.find_static_lib import SUPPORTED_STATIC_LIBS +from cuda.pathfinder._utils.driver_info import ( + DriverCudaVersion, + DriverReleaseVersion, + QueryDriverCudaVersionError, + QueryDriverReleaseVersionError, +) + + +def test_same_dynamic_link_component_requires_exact_ctk_major_minor_match(monkeypatch, tmp_path): + cublas_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "targets/x86_64-linux/lib/libcublas.so.12") + cusolver_path = _touch_ctk_file( + tmp_path / "cuda-12.9", + "12.9.20250531", + "targets/x86_64-linux/lib/libcusolver.so.12", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + cublas=_loaded_dl(cublas_path), + cusolver=_loaded_dl(cusolver_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("cublas") + + with pytest.raises( + CompatibilityCheckError, + match=r"dynamic-link component 'cuda_blas_solver_runtime'", + ): + guard_rails.load_nvidia_dynamic_lib("cusolver") + + +def test_independent_dynamic_libs_may_resolve_to_different_ctk_minors(monkeypatch, tmp_path): + nvrtc_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + nvjitlink_path = _touch_ctk_file( + tmp_path / "cuda-12.9", + "12.9.20250531", + "targets/x86_64-linux/lib/libnvJitLink.so.12", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + nvrtc=_loaded_dl(nvrtc_path), + nvJitLink=_loaded_dl(nvjitlink_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + loaded_nvrtc = guard_rails.load_nvidia_dynamic_lib("nvrtc") + loaded_nvjitlink = guard_rails.load_nvidia_dynamic_lib("nvJitLink") + + assert loaded_nvrtc.abs_path == nvrtc_path + assert loaded_nvjitlink.abs_path == nvjitlink_path + + +def test_toolchain_companions_require_exact_ctk_major_minor_match(monkeypatch, tmp_path): + static_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "targets/x86_64-linux/lib/libcudadevrt.a") + binary_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "bin/nvcc") + + monkeypatch.setattr( + compatibility_module, + "_locate_static_lib", + lambda _name: _located_static_lib("cudadevrt", static_path), + ) + monkeypatch.setattr( + compatibility_module, + "_find_nvidia_binary_utility", + lambda _utility_name: binary_path, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + assert guard_rails.find_static_lib("cudadevrt") == static_path + + with pytest.raises( + CompatibilityCheckError, + match=r"companion tag 'toolchain_cuda_nvcc'", + ): + guard_rails.find_nvidia_binary_utility("nvcc") + + +def test_declared_ltoir_pipeline_requires_nvjitlink_not_older_than_nvrtc(monkeypatch, tmp_path): + nvrtc_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + nvjitlink_path = _touch_ctk_file( + tmp_path / "cuda-12.8", + "12.8.20250303", + "targets/x86_64-linux/lib/libnvJitLink.so.12", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + nvrtc=_loaded_dl(nvrtc_path), + nvJitLink=_loaded_dl(nvjitlink_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("nvrtc") + guard_rails._declare_dynamic_lib_pipeline( + producer_libname="nvrtc", + consumer_libname="nvJitLink", + artifact_kind="ltoir", + ) + + with pytest.raises(CompatibilityCheckError, match=r"nvJitLink must be >= the producer version"): + guard_rails.load_nvidia_dynamic_lib("nvJitLink") + + +def test_declared_ltoir_pipeline_allows_same_major_newer_nvjitlink(monkeypatch, tmp_path): + nvrtc_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + nvjitlink_path = _touch_ctk_file( + tmp_path / "cuda-12.9", + "12.9.20250531", + "targets/x86_64-linux/lib/libnvJitLink.so.12", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + nvrtc=_loaded_dl(nvrtc_path), + nvJitLink=_loaded_dl(nvjitlink_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + loaded_nvrtc = guard_rails.load_nvidia_dynamic_lib("nvrtc") + loaded_nvjitlink = guard_rails.load_nvidia_dynamic_lib("nvJitLink") + guard_rails._declare_dynamic_lib_pipeline( + producer_libname="nvrtc", + consumer_libname="nvJitLink", + artifact_kind="ltoir", + ) + + assert loaded_nvrtc.abs_path == nvrtc_path + assert loaded_nvjitlink.abs_path == nvjitlink_path + + +@pytest.mark.parametrize("artifact_kind", ("ptx", "elf", "cubin")) +def test_declared_non_lto_pipeline_allows_cross_major_nvrtc_to_nvjitlink(monkeypatch, tmp_path, artifact_kind): + nvrtc_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + nvjitlink_path = _touch_ctk_file( + tmp_path / "cuda-13.0", + "13.0.20251003", + "targets/x86_64-linux/lib/libnvJitLink.so.13", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + nvrtc=_loaded_dl(nvrtc_path), + nvJitLink=_loaded_dl(nvjitlink_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + loaded_nvrtc = guard_rails.load_nvidia_dynamic_lib("nvrtc") + loaded_nvjitlink = guard_rails.load_nvidia_dynamic_lib("nvJitLink") + guard_rails._declare_dynamic_lib_pipeline( + producer_libname="nvrtc", + consumer_libname="nvJitLink", + artifact_kind=artifact_kind, + ) + + assert loaded_nvrtc.abs_path == nvrtc_path + assert loaded_nvjitlink.abs_path == nvjitlink_path + + +def test_declared_nvvm_pipeline_remains_conservative(monkeypatch, tmp_path): + nvvm_path = _touch_ctk_file(tmp_path / "cuda-12.8", "12.8.20250303", "nvvm/lib64/libnvvm.so.4") + nvjitlink_path = _touch_ctk_file( + tmp_path / "cuda-12.9", + "12.9.20250531", + "targets/x86_64-linux/lib/libnvJitLink.so.12", + ) + + _patch_dynamic_lib_loader( + monkeypatch, + nvvm=_loaded_dl(nvvm_path), + nvJitLink=_loaded_dl(nvjitlink_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("nvvm") + guard_rails.load_nvidia_dynamic_lib("nvJitLink") + + with pytest.raises( + CompatibilityCheckError, + match=r"remains conservative for explicit nvvm pipeline contexts", + ): + guard_rails._declare_dynamic_lib_pipeline( + producer_libname="nvvm", + consumer_libname="nvJitLink", + artifact_kind="ptx", + ) + + +def test_declared_dynamic_lib_pipeline_rejects_invalid_artifact_kind(): + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + with pytest.raises(ValueError, match="Invalid pipeline artifact kind"): + guard_rails._declare_dynamic_lib_pipeline( + producer_libname="nvrtc", + consumer_libname="nvJitLink", + artifact_kind="fatbin", + ) + + +def test_driver_major_must_not_be_older_than_ctk_major(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-13.0", "13.0.20251003", "targets/x86_64-linux/lib/libnvrtc.so.13") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(12080)) + + with pytest.raises(CompatibilityCheckError, match="driver_major >= ctk_major"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_missing_cuda_h_raises_insufficient_metadata(monkeypatch, tmp_path): + lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + with pytest.raises(CompatibilityInsufficientMetadataError, match="cuda.h"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_windows_style_ctk_root_uses_root_include_cuda_h(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-13.2" + _write_cuda_h(ctk_root, "13.2.20251003", include_dir_parts=("include",)) + lib_path = _touch(ctk_root / "bin" / "x64" / "nvrtc64_130_0.dll") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + +def test_other_packaging_raises_insufficient_metadata(monkeypatch, tmp_path): + abs_path = _touch(tmp_path / "site-packages" / "nvidia" / "nvshmem" / "lib" / "libnvshmem_device.bc") + + monkeypatch.setattr( + compatibility_module, + "_locate_bitcode_lib", + lambda _name: _located_bitcode_lib("nvshmem_device", abs_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + with pytest.raises(CompatibilityInsufficientMetadataError, match="packaged_with='ctk'"): + guard_rails.find_bitcode_lib("nvshmem_device") + + +def test_driver_libs_do_not_lock_ctk_anchor(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + ctk_lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + _patch_dynamic_lib_loader( + monkeypatch, + nvml=_loaded_dl(driver_lib_path, found_via="system-search"), + nvrtc=_loaded_dl(ctk_lib_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + driver_loaded = guard_rails.load_nvidia_dynamic_lib("nvml") + ctk_loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert driver_loaded.abs_path == driver_lib_path + assert ctk_loaded.abs_path == ctk_lib_path + + +def test_driver_libs_do_not_mask_later_ctk_mismatch(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + lib_path = _touch_ctk_file(lib_root, "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch_ctk_file(hdr_root, "12.9.20250531", "targets/x86_64-linux/include/nvrtc.h") + + _patch_dynamic_lib_loader( + monkeypatch, + nvml=_loaded_dl(driver_lib_path, found_via="system-search"), + nvrtc=_loaded_dl(lib_path), + ) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("nvml") + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + with pytest.raises(CompatibilityCheckError, match=r"companion tag 'api_nvrtc'"): + guard_rails.find_nvidia_header_directory("nvrtc") + + +@pytest.mark.parametrize( + "requirement", + ( + "nvidia-nvjitlink == 13.2.78.*; extra == 'nvjitlink'", + "nvidia-nvjitlink<14,>=13.2.78; extra == 'nvjitlink'", + ), +) +def test_wheel_metadata_accepts_exact_and_range_requirements(monkeypatch, tmp_path, requirement): + site_packages = tmp_path / "site-packages" + lib_path = _touch(site_packages / "nvidia" / "cu13" / "lib" / "libnvJitLink.so.13") + owner_dist = _FakeDistribution( + name="nvidia-nvjitlink", + version="13.2.78", + root=site_packages, + files=("nvidia/cu13/lib/libnvJitLink.so.13",), + ) + cuda_toolkit_dist = _FakeDistribution( + name="cuda-toolkit", + version="13.2.1", + root=site_packages, + requires=(requirement,), + ) + + compatibility_module._owned_distribution_candidates.cache_clear() + compatibility_module._cuda_toolkit_requirement_maps.cache_clear() + try: + monkeypatch.setattr( + compatibility_module.importlib.metadata, + "distributions", + lambda: (owner_dist, cuda_toolkit_dist), + ) + + metadata = compatibility_module._wheel_metadata_for_abs_path(lib_path) + finally: + compatibility_module._owned_distribution_candidates.cache_clear() + compatibility_module._cuda_toolkit_requirement_maps.cache_clear() + + assert metadata is not None + assert metadata.ctk_version.major == 13 + assert metadata.ctk_version.minor == 2 + assert metadata.source == "wheel metadata via nvidia-nvjitlink==13.2.78 pinned by cuda-toolkit==13.2.1" + + +def test_ctk_version_constraint_accepts_pep440_string(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + ctk_version=">=12.9,<13", + driver_cuda_version=_driver_cuda_version(13000), + ) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + +def test_ctk_version_constraint_accepts_specifier_set_instance(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + ctk_version=SpecifierSet(">=12.9,<13"), + driver_cuda_version=_driver_cuda_version(13000), + ) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + +def test_ctk_version_constraint_failure_raises(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + ctk_version="<12.9", + driver_cuda_version=_driver_cuda_version(13000), + ) + + with pytest.raises(CompatibilityCheckError, match="ctk_version<12.9"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_ctk_version_constraint_rejects_invalid_specifier(): + with pytest.raises(ValueError, match="PEP 440 specifier"): + CompatibilityGuardRails(ctk_version="13.2") + + +def test_resolved_items_capture_relation_metadata(tmp_path): + ctk_root = _make_ctk_root(tmp_path / "cuda-12.9", "12.9.20250531") + + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + header_dir = ctk_root / "targets" / "x86_64-linux" / "include" + _touch(header_dir / "fatbinary_section.h") + static_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libcudadevrt.a") + bitcode_path = _touch(ctk_root / "nvvm" / "libdevice" / "libdevice.10.bc") + binary_path = _touch(ctk_root / "bin" / "nvcc") + + dynamic_item = compatibility_module._resolve_dynamic_lib_item("nvrtc", _loaded_dl(lib_path)) + header_item = compatibility_module._resolve_header_item( + "nvcc", + LocatedHeaderDir(abs_path=str(header_dir), found_via="CUDA_PATH"), + ) + static_item = compatibility_module._resolve_static_lib_item(_located_static_lib("cudadevrt", static_path)) + bitcode_item = compatibility_module._resolve_bitcode_lib_item(_located_bitcode_lib("device", bitcode_path)) + binary_item = compatibility_module._resolve_binary_item("nvcc", binary_path) + + assert dynamic_item.dynamic_link_component == "nvrtc_mathdx" + assert dynamic_item.ctk_companion_tags == ("api_nvrtc",) + assert header_item.dynamic_link_component is None + assert header_item.ctk_companion_tags == ("toolchain_cuda_nvcc",) + assert static_item.ctk_companion_tags == ("toolchain_cuda_nvcc",) + assert bitcode_item.ctk_companion_tags == ("toolchain_cuda_nvcc",) + assert binary_item.ctk_companion_tags == ("toolchain_cuda_nvcc",) + assert dynamic_item.ctk_version == header_item.ctk_version == static_item.ctk_version == bitcode_item.ctk_version + assert binary_item.ctk_version == dynamic_item.ctk_version + + +@pytest.mark.parametrize("name", SUPPORTED_BITCODE_LIBS) +def test_resolve_bitcode_lib_item_covers_every_supported_name(tmp_path, name): + abs_path = _touch(tmp_path / "site-packages" / f"{name}.bc") + item = compatibility_module._resolve_bitcode_lib_item(_located_bitcode_lib(name, abs_path)) + assert item.name == name + assert item.kind == "bitcode-lib" + assert item.packaged_with in ("ctk", "other") + + +@pytest.mark.parametrize("name", SUPPORTED_STATIC_LIBS) +def test_resolve_static_lib_item_covers_every_supported_name(tmp_path, name): + abs_path = _touch(tmp_path / "site-packages" / f"{name}.a") + item = compatibility_module._resolve_static_lib_item(_located_static_lib(name, abs_path)) + assert item.name == name + assert item.kind == "static-lib" + assert item.packaged_with in ("ctk", "other") + + +@pytest.mark.parametrize("utility_name", SUPPORTED_BINARIES_ALL) +def test_resolve_binary_item_covers_every_supported_name(tmp_path, utility_name): + abs_path = _touch(tmp_path / "bin" / utility_name) + item = compatibility_module._resolve_binary_item(utility_name, abs_path) + assert item.name == utility_name + assert item.kind == "binary" + expected_packaged_with = "other" if utility_name in {"nsys", "nsight-sys", "ncu", "nsight-compute"} else "ctk" + assert item.packaged_with == expected_packaged_with + + +def test_static_bitcode_and_binary_methods_participate_in_checks(monkeypatch, tmp_path): + ctk_root = _make_ctk_root(tmp_path / "cuda-12.9", "12.9.20250531") + + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + static_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libcudadevrt.a") + bitcode_path = _touch(ctk_root / "nvvm" / "libdevice" / "libdevice.10.bc") + binary_path = _touch(ctk_root / "bin" / "nvcc") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_static_lib", + lambda _name: _located_static_lib("cudadevrt", static_path), + ) + monkeypatch.setattr( + compatibility_module, + "_locate_bitcode_lib", + lambda _name: _located_bitcode_lib("device", bitcode_path), + ) + monkeypatch.setattr( + compatibility_module, + "_find_nvidia_binary_utility", + lambda _utility_name: binary_path, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + guard_rails.load_nvidia_dynamic_lib("nvrtc") + assert guard_rails.find_static_lib("cudadevrt") == static_path + assert guard_rails.find_bitcode_lib("device") == bitcode_path + assert guard_rails.find_nvidia_binary_utility("nvcc") == binary_path + + +def test_guard_rails_query_driver_cuda_version_by_default(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + query_calls: list[int] = [] + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + def fake_query_driver_cuda_version() -> DriverCudaVersion: + query_calls.append(1) + return _driver_cuda_version(13000) + + monkeypatch.setattr(compatibility_module, "query_driver_cuda_version", fake_query_driver_cuda_version) + monkeypatch.setattr( + compatibility_module, + "query_driver_release_version", + lambda: pytest.fail("backward-compatible driver should not need display-driver release metadata"), + ) + + guard_rails = CompatibilityGuardRails() + + guard_rails.load_nvidia_dynamic_lib("nvrtc") + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert len(query_calls) == 1 + + +def test_guard_rails_wrap_driver_query_failures(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + def fail_query_driver_cuda_version() -> DriverCudaVersion: + raise QueryDriverCudaVersionError("driver query failed") + + monkeypatch.setattr(compatibility_module, "query_driver_cuda_version", fail_query_driver_cuda_version) + + guard_rails = CompatibilityGuardRails() + + with pytest.raises( + CompatibilityCheckError, + match="Failed to query the CUDA driver version needed for compatibility checks", + ) as exc_info: + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert isinstance(exc_info.value.__cause__, QueryDriverCudaVersionError) + + +def test_guard_rails_accept_minor_version_compatibility_with_driver_release_branch(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + driver_cuda_version=_driver_cuda_version(12000), + driver_release_version=_driver_release_version("525.60.13"), + ) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + +def test_guard_rails_reject_same_major_older_driver_when_release_branch_too_old(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + driver_cuda_version=_driver_cuda_version(12000), + driver_release_version=_driver_release_version("520.30.01"), + ) + + with pytest.raises( + CompatibilityCheckError, + match=r"branch 520\) is below NVIDIA's published CUDA 12\.x minimum branch >= 525", + ): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_guard_rails_require_driver_release_metadata_for_same_major_older_driver(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + def fail_query_driver_release_version() -> DriverReleaseVersion: + raise QueryDriverReleaseVersionError("release query failed") + + monkeypatch.setattr( + compatibility_module, + "query_driver_release_version", + fail_query_driver_release_version, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(12000)) + + with pytest.raises( + CompatibilityInsufficientMetadataError, + match="Failed to query the display-driver release version needed for compatibility checks", + ) as exc_info: + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert isinstance(exc_info.value.__cause__, QueryDriverReleaseVersionError) + + +def test_find_nvidia_header_directory_returns_none_when_unresolved(monkeypatch): + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: None, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + assert guard_rails.find_nvidia_header_directory("nvrtc") is None + + +def test_register_and_check_is_idempotent_for_repeated_items(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + item = compatibility_module._resolve_dynamic_lib_item("nvrtc", _loaded_dl(lib_path)) + + guard_rails._register_and_check(item) + guard_rails._register_and_check(item) + guard_rails._register_and_check(item) + + matching = [resolved for resolved in guard_rails._resolved_items if resolved == item] + assert len(matching) == 1 + + +def test_driver_ctk_compatibility_error_is_typed_catchable(monkeypatch, tmp_path): + lib_path = _touch_ctk_file(tmp_path / "cuda-12.9", "12.9.20250531", "targets/x86_64-linux/lib/libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + driver_cuda_version=_driver_cuda_version(12000), + driver_release_version=_driver_release_version("520.30.01"), + ) + + with pytest.raises(DriverCtkCompatibilityError) as exc_info: + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert isinstance(exc_info.value, CompatibilityCheckError) + assert "OS handle remains live" in str(exc_info.value) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails_public.py b/cuda_pathfinder/tests/test_compatibility_guard_rails_public.py new file mode 100644 index 00000000000..02039edbbd8 --- /dev/null +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails_public.py @@ -0,0 +1,405 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest +from compatibility_guard_rails_test_utils import ( + COMPATIBILITY_GUARD_RAILS_ENV_VAR, + DRIVER_COMPATIBILITY_ENV_VAR, + _default_process_wide_guard_rails_mode, # noqa: F401 + _DelegatingProcessWideGuardRails, + _driver_cuda_version, + _loaded_dl, + _located_bitcode_lib, + _located_static_lib, + _touch, + _touch_ctk_file, + compatibility_module, + process_wide_module, +) + +import cuda.pathfinder._static_libs.find_bitcode_lib as find_bitcode_lib_module +import cuda.pathfinder._static_libs.find_static_lib as find_static_lib_module +from cuda import pathfinder +from cuda.pathfinder import ( + CompatibilityCheckError, + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, + LoadedDL, + LocatedHeaderDir, + process_wide_compatibility_guard_rails, +) +from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home + + +def test_process_wide_compatibility_guard_rails_is_public_singleton(): + assert process_wide_compatibility_guard_rails is pathfinder.process_wide_compatibility_guard_rails + assert isinstance(process_wide_compatibility_guard_rails, CompatibilityGuardRails) + + +@pytest.mark.parametrize( + ("public_api_name", "guard_rails_method_name", "args", "return_value"), + [ + ( + "load_nvidia_dynamic_lib", + "load_nvidia_dynamic_lib", + ("nvrtc",), + _loaded_dl("/opt/mock/libnvrtc.so.12"), + ), + ( + "locate_nvidia_header_directory", + "locate_nvidia_header_directory", + ("nvrtc",), + LocatedHeaderDir(abs_path="/opt/mock/include", found_via="CUDA_PATH"), + ), + ("find_nvidia_header_directory", "find_nvidia_header_directory", ("nvrtc",), "/opt/mock/include"), + ( + "locate_static_lib", + "locate_static_lib", + ("cudadevrt",), + _located_static_lib("cudadevrt", "/opt/mock/libcudadevrt.a"), + ), + ("find_static_lib", "find_static_lib", ("cudadevrt",), "/opt/mock/libcudadevrt.a"), + ( + "locate_bitcode_lib", + "locate_bitcode_lib", + ("device",), + _located_bitcode_lib("device", "/opt/mock/libdevice.10.bc"), + ), + ("find_bitcode_lib", "find_bitcode_lib", ("device",), "/opt/mock/libdevice.10.bc"), + ("find_nvidia_binary_utility", "find_nvidia_binary_utility", ("nvcc",), "/opt/mock/nvcc"), + ], +) +def test_public_apis_route_through_process_wide_guard_rails( + monkeypatch, public_api_name, guard_rails_method_name, args, return_value +): + fake_guard_rails = _DelegatingProcessWideGuardRails(guard_rails_method_name, return_value) + monkeypatch.setattr(pathfinder, "process_wide_compatibility_guard_rails", fake_guard_rails) + + result = getattr(pathfinder, public_api_name)(*args) + + assert result == return_value + assert fake_guard_rails.calls == [(guard_rails_method_name, args)] + + +def test_public_driver_libs_are_allowed_in_strict_mode(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + + monkeypatch.setattr( + compatibility_module, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_dl(driver_lib_path, found_via="system-search"), + ) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + def fail_raw_fallback(_libname: str) -> LoadedDL: + pytest.fail("strict mode must not fall back to raw loading") + + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", fail_raw_fallback) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvml") + + assert loaded.abs_path == driver_lib_path + + +@pytest.mark.parametrize("env_value", [None, ""]) +def test_public_apis_default_mode_applies_when_env_var_is_unset_or_empty(monkeypatch, tmp_path, env_value): + guarded_lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(guarded_lib_path)) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + if env_value is None: + monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) + else: + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, env_value) + + default_mode = process_wide_module._COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE + if default_mode == "strict": + with pytest.raises(CompatibilityInsufficientMetadataError, match="cuda.h"): + pathfinder.load_nvidia_dynamic_lib("nvrtc") + return + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + assert loaded is raw_loaded + + +def test_public_apis_best_effort_fall_back_on_insufficient_metadata(monkeypatch, tmp_path): + guarded_lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "best_effort") + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(guarded_lib_path)) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + + +def test_public_apis_off_bypass_process_wide_guard_rails(monkeypatch): + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + fake_guard_rails = _DelegatingProcessWideGuardRails( + "load_nvidia_dynamic_lib", + _loaded_dl("/opt/mock/guard-rails/libnvrtc.so.12"), + ) + + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + monkeypatch.setattr(pathfinder, "process_wide_compatibility_guard_rails", fake_guard_rails) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + assert fake_guard_rails.calls == [] + + +def test_public_apis_reject_invalid_guard_rails_mode(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "unexpected") + + with pytest.raises(RuntimeError, match=COMPATIBILITY_GUARD_RAILS_ENV_VAR) as exc_info: + pathfinder.find_nvidia_binary_utility("nvcc") + + message = str(exc_info.value) + assert "'off'" in message + assert "'best_effort'" in message + assert "'strict'" in message + assert f"defaults to {process_wide_module._COMPATIBILITY_GUARD_RAILS_DEFAULT_MODE!r}" in message + + +def test_public_apis_reject_invalid_driver_compatibility_mode(monkeypatch): + monkeypatch.setenv(DRIVER_COMPATIBILITY_ENV_VAR, "unexpected") + + with pytest.raises(RuntimeError, match=DRIVER_COMPATIBILITY_ENV_VAR) as exc_info: + pathfinder.find_nvidia_binary_utility("nvcc") + + message = str(exc_info.value) + assert "'default'" in message + assert "'assume_forward_compatibility'" in message + assert f"defaults to {process_wide_module._DRIVER_COMPATIBILITY_DEFAULT_MODE!r}" in message + + +def test_driver_compatibility_override_is_linux_only(monkeypatch): + monkeypatch.setenv(DRIVER_COMPATIBILITY_ENV_VAR, "assume_forward_compatibility") + monkeypatch.setattr(process_wide_module.sys, "platform", "win32") + + with pytest.raises(RuntimeError, match="only supported on Linux"): + pathfinder.find_nvidia_binary_utility("nvcc") + + +def test_driver_compatibility_override_is_not_validated_when_guard_rails_off(monkeypatch): + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + monkeypatch.setenv(DRIVER_COMPATIBILITY_ENV_VAR, "assume_forward_compatibility") + monkeypatch.setattr(process_wide_module.sys, "platform", "win32") + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + + +@pytest.mark.skipif( + not sys.platform.startswith("linux"), + reason="driver forward-compatibility override is Linux-only", +) +def test_public_driver_mismatch_advertises_forward_compatibility_override(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-13.0" + lib_path = _touch_ctk_file(ctk_root, "13.0.20251003", "targets/x86_64-linux/lib/libnvrtc.so.13") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(12080)), + ) + + def fail_raw_fallback(_libname: str) -> LoadedDL: + pytest.fail("driver mismatch should not fall back without explicit override") + + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", fail_raw_fallback) + + with pytest.raises(CompatibilityCheckError, match="driver_major >= ctk_major") as exc_info: + pathfinder.load_nvidia_dynamic_lib("nvrtc") + + message = str(exc_info.value) + assert DRIVER_COMPATIBILITY_ENV_VAR in message + assert "assume_forward_compatibility" in message + assert "does not relax CTK-coherence checks" in message + + +@pytest.mark.skipif( + not sys.platform.startswith("linux"), + reason="driver forward-compatibility override is Linux-only", +) +def test_public_driver_mismatch_falls_back_when_assuming_forward_compatibility(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-13.0" + guarded_lib_path = _touch_ctk_file(ctk_root, "13.0.20251003", "targets/x86_64-linux/lib/libnvrtc.so.13") + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.13", found_via="system-search") + + monkeypatch.setenv(DRIVER_COMPATIBILITY_ENV_VAR, "assume_forward_compatibility") + monkeypatch.setattr( + compatibility_module, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_dl(guarded_lib_path), + ) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(12080)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + + +@pytest.mark.skipif( + not sys.platform.startswith("linux"), + reason="driver forward-compatibility override is Linux-only", +) +def test_forward_compatibility_override_does_not_relax_ctk_coherence_checks(monkeypatch, tmp_path): + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + lib_path = _touch_ctk_file(lib_root, "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch_ctk_file(hdr_root, "12.9.20250531", "targets/x86_64-linux/include/nvrtc.h") + + monkeypatch.setenv(DRIVER_COMPATIBILITY_ENV_VAR, "assume_forward_compatibility") + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + with pytest.raises(CompatibilityCheckError, match=r"companion tag 'api_nvrtc'"): + pathfinder.find_nvidia_header_directory("nvrtc") + + +def test_public_apis_share_process_wide_guard_rails_state(monkeypatch, tmp_path): + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + lib_path = _touch_ctk_file(lib_root, "12.8.20250303", "targets/x86_64-linux/lib/libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch_ctk_file(hdr_root, "12.9.20250531", "targets/x86_64-linux/include/nvrtc.h") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + with pytest.raises(CompatibilityCheckError, match=r"companion tag 'api_nvrtc'"): + pathfinder.find_nvidia_header_directory("nvrtc") + + +@pytest.mark.parametrize( + ("public_find_name", "public_locate_name", "raw_module", "finder_class_name", "name", "relative_path"), + [ + ( + "find_static_lib", + "locate_static_lib", + find_static_lib_module, + "_FindStaticLib", + "cudadevrt", + Path(find_static_lib_module._SUPPORTED_STATIC_LIBS_INFO["cudadevrt"]["ctk_rel_paths"][0]) + / find_static_lib_module._SUPPORTED_STATIC_LIBS_INFO["cudadevrt"]["filename"], + ), + ( + "find_bitcode_lib", + "locate_bitcode_lib", + find_bitcode_lib_module, + "_FindBitcodeLib", + "device", + Path(find_bitcode_lib_module._SUPPORTED_BITCODE_LIBS_INFO["device"]["rel_path"]) + / find_bitcode_lib_module._SUPPORTED_BITCODE_LIBS_INFO["device"]["filename"], + ), + ], +) +def test_public_strict_mode_static_and_bitcode_reuse_cached_locate_path( + monkeypatch, + tmp_path, + public_find_name, + public_locate_name, + raw_module, + finder_class_name, + name, + relative_path, +): + ctk_root = tmp_path / "cuda-12.9" + abs_path = _touch_ctk_file(ctk_root, "12.9.20250531", relative_path) + finder_class = getattr(raw_module, finder_class_name) + original_try_with_cuda_home = finder_class.try_with_cuda_home + try_with_cuda_home_calls: list[str] = [] + + def counting_try_with_cuda_home(self): + try_with_cuda_home_calls.append(self.name) + return original_try_with_cuda_home(self) + + monkeypatch.setattr(raw_module, "find_sub_dirs_all_sitepackages", lambda _sub_dir: []) + monkeypatch.setattr(finder_class, "try_with_cuda_home", counting_try_with_cuda_home) + monkeypatch.delenv("CONDA_PREFIX", raising=False) + monkeypatch.delenv("CUDA_PATH", raising=False) + monkeypatch.setenv("CUDA_HOME", str(ctk_root)) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + public_find = getattr(pathfinder, public_find_name) + public_locate = getattr(pathfinder, public_locate_name) + + public_locate.cache_clear() + get_cuda_path_or_home.cache_clear() + try: + assert public_find(name) == abs_path + assert public_find(name) == abs_path + assert try_with_cuda_home_calls == [name] + + public_find.cache_clear() + assert public_locate(name).abs_path == abs_path + assert try_with_cuda_home_calls == [name, name] + finally: + public_find.cache_clear() + public_locate.cache_clear() + get_cuda_path_or_home.cache_clear() diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails_real_host.py b/cuda_pathfinder/tests/test_compatibility_guard_rails_real_host.py new file mode 100644 index 00000000000..0a5b4f37d66 --- /dev/null +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails_real_host.py @@ -0,0 +1,114 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pytest +from compatibility_guard_rails_test_utils import ( + STRICTNESS, + _assert_real_ctk_backed_path, + _default_process_wide_guard_rails_mode, # noqa: F401 + clear_real_host_probe_caches, # noqa: F401 +) +from local_helpers import ( + have_distribution, + require_real_cuda_toolkit_version_from_cuda_h, + require_real_driver_cuda_version, +) + +from cuda.pathfinder import ( + BitcodeLibNotFoundError, + CompatibilityCheckError, + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, + DynamicLibNotFoundError, + StaticLibNotFoundError, +) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_driver(info_summary_append): + real_driver = require_real_driver_cuda_version() + info_summary_append( + f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + ) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_ctk(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + info_summary_append( + f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " + f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" + ) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_wheel_ctk_items_are_compatible(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + real_driver = require_real_driver_cuda_version() + guard_rails = CompatibilityGuardRails( + ctk_version=f"=={real_ctk.version.major}.{real_ctk.version.minor}", + driver_cuda_version=real_driver, + ) + + try: + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + header_dir = guard_rails.find_nvidia_header_directory("nvrtc") + static_lib = guard_rails.find_static_lib("cudadevrt") + bitcode_lib = guard_rails.find_bitcode_lib("device") + nvcc = guard_rails.find_nvidia_binary_utility("nvcc") + except ( + CompatibilityCheckError, + CompatibilityInsufficientMetadataError, + DynamicLibNotFoundError, + StaticLibNotFoundError, + BitcodeLibNotFoundError, + ) as exc: + if STRICTNESS == "all_must_work": + raise + pytest.skip(f"real CTK check unavailable: {exc.__class__.__name__}: {exc}") + + assert isinstance(loaded.abs_path, str) + assert header_dir is not None + for path in (loaded.abs_path, header_dir, static_lib, bitcode_lib): + _assert_real_ctk_backed_path(path) + if have_distribution(r"^nvidia-cuda-nvcc-cu12$"): + # For CUDA 12, NVIDIA publishes a PyPI package named nvidia-cuda-nvcc-cu12, + # but the wheels only contain nvcc-adjacent compiler components such as + # ptxas, CRT headers, libnvvm, and libdevice; the nvcc executable itself + # is not included. + if nvcc is not None: + # nvcc found elsewhere, e.g. /usr/local or Conda. + _assert_real_ctk_backed_path(nvcc) + else: + if nvcc is None: + if STRICTNESS == "all_must_work": + raise AssertionError("Expected CTK-backed nvcc to be discoverable.") + info_summary_append("real CTK-backed nvcc executable not found; continuing without asserting nvcc") + else: + _assert_real_ctk_backed_path(nvcc) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + real_driver = require_real_driver_cuda_version() + guard_rails = CompatibilityGuardRails( + ctk_version=f"=={real_ctk.version.major}.{real_ctk.version.minor}", + driver_cuda_version=real_driver, + ) + + try: + header_dir = guard_rails.find_nvidia_header_directory("cufft") + except (CompatibilityCheckError, CompatibilityInsufficientMetadataError) as exc: + if STRICTNESS == "all_must_work": + raise + pytest.skip(f"real cufft CTK check unavailable: {exc.__class__.__name__}: {exc}") + + if header_dir is None: + if STRICTNESS == "all_must_work": + raise AssertionError("Expected CTK-backed cufft headers to be discoverable.") + pytest.skip("real cufft CTK check unavailable: cufft headers not found") + + _assert_real_ctk_backed_path(header_dir) diff --git a/cuda_pathfinder/tests/test_descriptor_catalog.py b/cuda_pathfinder/tests/test_descriptor_catalog.py index b2c8eece4bb..8fe603adc42 100644 --- a/cuda_pathfinder/tests/test_descriptor_catalog.py +++ b/cuda_pathfinder/tests/test_descriptor_catalog.py @@ -17,6 +17,7 @@ _VALID_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") _VALID_PACKAGED_WITH_VALUES = {"ctk", "other", "driver"} +_VALID_RELATION_NAME_RE = re.compile(r"^[a-z][a-z0-9_]*$") _CATALOG_BY_NAME = {spec.name: spec for spec in DESCRIPTOR_CATALOG} @@ -96,3 +97,75 @@ def test_ctk_root_canary_anchors_reference_known_ctk_libs(spec: DescriptorSpec): def test_only_ctk_libs_define_ctk_root_canary_anchors(spec: DescriptorSpec): if spec.ctk_root_canary_anchor_libnames: assert spec.packaged_with == "ctk", f"{spec.name} defines canary anchors but is not a CTK lib" + + +@pytest.mark.parametrize("spec", DESCRIPTOR_CATALOG, ids=lambda s: s.name) +def test_dynamic_link_component_names_are_valid(spec: DescriptorSpec): + if spec.dynamic_link_component is not None: + assert _VALID_RELATION_NAME_RE.match(spec.dynamic_link_component) + + +@pytest.mark.parametrize("spec", DESCRIPTOR_CATALOG, ids=lambda s: s.name) +def test_dynamic_link_components_are_not_assigned_to_driver_libs(spec: DescriptorSpec): + if spec.dynamic_link_component is not None: + assert spec.packaged_with != "driver" + + +@pytest.mark.parametrize("spec", DESCRIPTOR_CATALOG, ids=lambda s: s.name) +def test_ctk_companion_tags_are_unique_and_valid(spec: DescriptorSpec): + assert len(spec.ctk_companion_tags) == len(set(spec.ctk_companion_tags)) + for tag in spec.ctk_companion_tags: + assert _VALID_RELATION_NAME_RE.match(tag) + + +def test_dynamic_link_components_encode_authored_groups(): + grouped: dict[str, set[str]] = {} + for spec in DESCRIPTOR_CATALOG: + if spec.dynamic_link_component is None: + continue + grouped.setdefault(spec.dynamic_link_component, set()).add(spec.name) + + assert grouped == { + "cuda_blas_solver_runtime": { + "cublas", + "cublasLt", + "cublasmp", + "cudart", + "cudss", + "cusolver", + "cusolverMg", + "cusolverMp", + "cusparse", + "cutensor", + "cutensorMg", + "nccl", + "nvJitLink", + "nvblas", + }, + "cufft_nvshmem": { + "cufft", + "cufftMp", + "cufftw", + "nvshmem_host", + }, + "cufile": { + "cufile", + }, + "npp": { + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + }, + "nvrtc_mathdx": { + "mathdx", + "nvrtc", + }, + } diff --git a/cuda_pathfinder/tests/test_driver_lib_loading.py b/cuda_pathfinder/tests/test_driver_lib_loading.py index b97453c9b5a..b94f4010c3e 100644 --- a/cuda_pathfinder/tests/test_driver_lib_loading.py +++ b/cuda_pathfinder/tests/test_driver_lib_loading.py @@ -37,6 +37,8 @@ _CUDA_DESC = LIB_DESCRIPTORS["cuda"] _NVML_DESC = LIB_DESCRIPTORS["nvml"] +pytestmark = pytest.mark.usefixtures("disable_process_wide_compatibility_guard_rails") + def _make_loaded_dl(path, found_via): return LoadedDL(path, False, 0xDEAD, found_via) @@ -163,8 +165,9 @@ def raise_child_process_failed(): def test_real_query_driver_cuda_version(info_summary_append): driver_info._load_nvidia_dynamic_lib.cache_clear() driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() try: - version = driver_info.query_driver_cuda_version() + driver_cuda_version = driver_info.query_driver_cuda_version() except driver_info.QueryDriverCudaVersionError as exc: if STRICTNESS == "all_must_work": raise @@ -173,8 +176,35 @@ def test_real_query_driver_cuda_version(info_summary_append): finally: driver_info._load_nvidia_dynamic_lib.cache_clear() driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() + + info_summary_append( + "driver_cuda_version=" + f"{driver_cuda_version.major}.{driver_cuda_version.minor} " + f"(encoded={driver_cuda_version.encoded})" + ) + assert driver_cuda_version.encoded > 0 + assert driver_cuda_version.major == driver_cuda_version.encoded // 1000 + assert driver_cuda_version.minor == (driver_cuda_version.encoded % 1000) // 10 + - info_summary_append(f"driver_version={version.major}.{version.minor} (encoded={version.encoded})") - assert version.encoded > 0 - assert version.major == version.encoded // 1000 - assert version.minor == (version.encoded % 1000) // 10 +def test_real_query_driver_release_version(info_summary_append): + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_release_version.cache_clear() + try: + driver_release_version = driver_info.query_driver_release_version() + except driver_info.QueryDriverReleaseVersionError as exc: + if STRICTNESS == "all_must_work": + raise + info_summary_append(f"driver release unavailable: {exc.__class__.__name__}: {exc}") + return + finally: + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_release_version.cache_clear() + + info_summary_append( + f"driver_release_version={driver_release_version.text} " + f"(branch={driver_release_version.branch}, components={driver_release_version.components})" + ) + assert driver_release_version.branch >= 400 + assert driver_release_version.components[0] == driver_release_version.branch diff --git a/cuda_pathfinder/tests/test_find_bitcode_lib.py b/cuda_pathfinder/tests/test_find_bitcode_lib.py index 659b068f0ff..9ba2d3e4158 100644 --- a/cuda_pathfinder/tests/test_find_bitcode_lib.py +++ b/cuda_pathfinder/tests/test_find_bitcode_lib.py @@ -29,10 +29,10 @@ def _bitcode_lib_filename(libname: str) -> str: @pytest.fixture def clear_find_bitcode_lib_cache(): - find_bitcode_lib_module.find_bitcode_lib.cache_clear() + find_bitcode_lib_module.locate_bitcode_lib.cache_clear() get_cuda_path_or_home.cache_clear() yield - find_bitcode_lib_module.find_bitcode_lib.cache_clear() + find_bitcode_lib_module.locate_bitcode_lib.cache_clear() get_cuda_path_or_home.cache_clear() @@ -124,11 +124,13 @@ def find_expected_sub_dir(sub_dir): assert located_lib.abs_path == site_packages_path assert located_lib.found_via == "site-packages" os.remove(site_packages_path) + find_bitcode_lib_module.locate_bitcode_lib.cache_clear() located_lib = locate_bitcode_lib(libname) assert located_lib.abs_path == conda_path assert located_lib.found_via == "conda" os.remove(conda_path) + find_bitcode_lib_module.locate_bitcode_lib.cache_clear() located_lib = locate_bitcode_lib(libname) assert located_lib.abs_path == cuda_home_path diff --git a/cuda_pathfinder/tests/test_find_nvidia_binaries.py b/cuda_pathfinder/tests/test_find_nvidia_binaries.py index ec9740cd853..ec373cffaa2 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_binaries.py +++ b/cuda_pathfinder/tests/test_find_nvidia_binaries.py @@ -14,6 +14,8 @@ SUPPORTED_BINARIES_ALL, ) +pytestmark = pytest.mark.usefixtures("disable_process_wide_compatibility_guard_rails") + def test_unknown_utility_name(): with pytest.raises(UnsupportedBinaryError, match=r"'unknown-utility' is not supported"): @@ -153,38 +155,3 @@ def test_find_binary_without_site_packages_entry(monkeypatch, mocker): os.path.join(cuda_home, "bin"), ] which_mock.assert_called_once_with("nvcc", path=os.pathsep.join(expected_dirs)) - - -@pytest.mark.usefixtures("clear_find_binary_cache") -def test_find_binary_cache_negative_result(monkeypatch, mocker): - mocker.patch.object(binary_finder_module, "IS_WINDOWS", new=False) - mocker.patch.object(binary_finder_module.supported_nvidia_binaries, "SITE_PACKAGES_BINDIRS", {}) - mocker.patch.object(binary_finder_module, "find_sub_dirs_all_sitepackages", return_value=[]) - monkeypatch.delenv("CONDA_PREFIX", raising=False) - mocker.patch.object(binary_finder_module, "get_cuda_path_or_home", return_value=None) - which_mock = mocker.patch.object(binary_finder_module.shutil, "which", return_value=None) - - first = find_nvidia_binary_utility("nvcc") - second = find_nvidia_binary_utility("nvcc") - - assert first is None - assert second is None - which_mock.assert_called_once_with("nvcc", path="") - - -@pytest.mark.usefixtures("clear_find_binary_cache") -def test_caching_per_utility(): - """Verify that different utilities have independent cache entries.""" - nvdisasm1 = find_nvidia_binary_utility("nvdisasm") - nvcc1 = find_nvidia_binary_utility("nvcc") - nvdisasm2 = find_nvidia_binary_utility("nvdisasm") - nvcc2 = find_nvidia_binary_utility("nvcc") - - # Same utility should return cached result - assert nvdisasm1 is nvdisasm2 - assert nvcc1 is nvcc2 - - # Different utilities should have different results (unless at least one of - # them is None) - if nvdisasm1 is not None and nvcc1 is not None: - assert nvdisasm1 != nvcc1 diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index e28f64d3520..5a1a7f46291 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -40,6 +40,9 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +pytestmark = pytest.mark.usefixtures("disable_process_wide_compatibility_guard_rails") + + NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { "cusolverMp": r"^nvidia-cusolvermp-.*$", "cusparseLt": r"^nvidia-cusparselt-.*$", diff --git a/cuda_pathfinder/tests/test_find_static_lib.py b/cuda_pathfinder/tests/test_find_static_lib.py index 2b30aa12011..be27349d03a 100644 --- a/cuda_pathfinder/tests/test_find_static_lib.py +++ b/cuda_pathfinder/tests/test_find_static_lib.py @@ -24,10 +24,10 @@ @pytest.fixture def clear_find_static_lib_cache(): - find_static_lib_module.find_static_lib.cache_clear() + find_static_lib_module.locate_static_lib.cache_clear() get_cuda_path_or_home.cache_clear() yield - find_static_lib_module.find_static_lib.cache_clear() + find_static_lib_module.locate_static_lib.cache_clear() get_cuda_path_or_home.cache_clear() @@ -78,7 +78,7 @@ def test_locate_static_lib(info_summary_append, libname): @pytest.mark.usefixtures("clear_find_static_lib_cache") def test_locate_static_lib_search_order(monkeypatch, tmp_path): filename = CUDADEVRT_INFO["filename"] - conda_rel_path = CUDADEVRT_INFO["conda_rel_path"] + conda_rel_path = CUDADEVRT_INFO["conda_rel_paths"][0] site_pkg_rel = CUDADEVRT_INFO["site_packages_dirs"][0] site_packages_lib_dir = tmp_path / "site-packages" / Path(site_pkg_rel.replace("/", os.sep)) @@ -106,17 +106,45 @@ def test_locate_static_lib_search_order(monkeypatch, tmp_path): assert located_lib.abs_path == site_packages_path assert located_lib.found_via == "site-packages" os.remove(site_packages_path) + find_static_lib_module.locate_static_lib.cache_clear() located_lib = locate_static_lib("cudadevrt") assert located_lib.abs_path == conda_path assert located_lib.found_via == "conda" os.remove(conda_path) + find_static_lib_module.locate_static_lib.cache_clear() located_lib = locate_static_lib("cudadevrt") assert located_lib.abs_path == cuda_home_path assert located_lib.found_via == "CUDA_PATH" +@pytest.mark.usefixtures("clear_find_static_lib_cache") +def test_locate_static_lib_conda_rel_path_fallback(monkeypatch, tmp_path): + filename = CUDADEVRT_INFO["filename"] + conda_rel_paths = CUDADEVRT_INFO["conda_rel_paths"] + if len(conda_rel_paths) == 1: + monkeypatch.setitem(CUDADEVRT_INFO, "conda_rel_paths", ("missing-first", conda_rel_paths[0])) + conda_rel_paths = CUDADEVRT_INFO["conda_rel_paths"] + + conda_prefix = tmp_path / "conda-prefix" + conda_lib_dir = _conda_anchor(conda_prefix) / Path(conda_rel_paths[1]) + conda_path = _make_static_lib_file(conda_lib_dir, filename) + + monkeypatch.setattr( + find_static_lib_module, + "find_sub_dirs_all_sitepackages", + lambda _sub_dir: [], + ) + monkeypatch.setenv("CONDA_PREFIX", str(conda_prefix)) + monkeypatch.delenv("CUDA_HOME", raising=False) + monkeypatch.delenv("CUDA_PATH", raising=False) + + located_lib = locate_static_lib("cudadevrt") + assert located_lib.abs_path == conda_path + assert located_lib.found_via == "conda" + + @pytest.mark.usefixtures("clear_find_static_lib_cache") def test_find_static_lib_not_found_error_includes_cuda_home_directory_listing(monkeypatch, tmp_path): filename = CUDADEVRT_INFO["filename"] diff --git a/cuda_pathfinder/tests/test_header_descriptor_catalog.py b/cuda_pathfinder/tests/test_header_descriptor_catalog.py new file mode 100644 index 00000000000..86f15004970 --- /dev/null +++ b/cuda_pathfinder/tests/test_header_descriptor_catalog.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import re + +import pytest + +from cuda.pathfinder._headers.header_descriptor_catalog import HEADER_DESCRIPTOR_CATALOG, HeaderDescriptorSpec + +_VALID_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") +_VALID_RELATION_NAME_RE = re.compile(r"^[a-z][a-z0-9_]*$") + + +def test_catalog_names_are_unique(): + names = [spec.name for spec in HEADER_DESCRIPTOR_CATALOG] + assert len(names) == len(set(names)) + + +@pytest.mark.parametrize("spec", HEADER_DESCRIPTOR_CATALOG, ids=lambda s: s.name) +def test_name_is_valid_identifier(spec: HeaderDescriptorSpec): + assert _VALID_NAME_RE.match(spec.name), f"{spec.name!r} is not a valid Python identifier" + + +@pytest.mark.parametrize("spec", HEADER_DESCRIPTOR_CATALOG, ids=lambda s: s.name) +def test_ctk_companion_tags_are_unique_and_valid(spec: HeaderDescriptorSpec): + assert len(spec.ctk_companion_tags) == len(set(spec.ctk_companion_tags)) + for tag in spec.ctk_companion_tags: + assert _VALID_RELATION_NAME_RE.match(tag) + + +def test_known_toolchain_headers_share_toolchain_tag(): + expected = {"cccl", "cudart", "cutlass", "cute", "nvcc", "nvfatbin", "nvvm"} + actual = {spec.name for spec in HEADER_DESCRIPTOR_CATALOG if "toolchain_cuda_nvcc" in spec.ctk_companion_tags} + assert actual == expected + + +def test_supported_runtime_headers_keep_companion_tags(): + expected = { + "cublas": ("api_cublas",), + "cudart": ("api_cudart", "toolchain_cuda_nvcc"), + "cufft": ("api_cufft",), + "curand": ("api_curand",), + "cusolver": ("api_cusolver",), + "cusparse": ("api_cusparse",), + "npp": ("api_npp",), + "nvjitlink": ("api_nvjitlink",), + "nvrtc": ("api_nvrtc",), + "nvvm": ("api_nvvm", "toolchain_cuda_nvcc"), + } + actual = {spec.name: spec.ctk_companion_tags for spec in HEADER_DESCRIPTOR_CATALOG if spec.name in expected} + assert actual == expected diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 401e7dc13f8..33183894f93 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -24,6 +24,8 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +pytestmark = pytest.mark.usefixtures("disable_process_wide_compatibility_guard_rails") + def test_supported_libnames_linux_sonames_consistency(): assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_LINUX)) == tuple( diff --git a/cuda_pathfinder/tests/test_utils_driver_info.py b/cuda_pathfinder/tests/test_utils_driver_info.py index 21948dadafe..b74823cac47 100644 --- a/cuda_pathfinder/tests/test_utils_driver_info.py +++ b/cuda_pathfinder/tests/test_utils_driver_info.py @@ -12,8 +12,10 @@ @pytest.fixture(autouse=True) def _clear_driver_cuda_version_query_cache(): driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() yield driver_info.query_driver_cuda_version.cache_clear() + driver_info.query_driver_release_version.cache_clear() class _FakeCuDriverGetVersion: @@ -33,6 +35,47 @@ def __init__(self, *, status: int, version: int): self.cuDriverGetVersion = _FakeCuDriverGetVersion(status=status, version=version) +class _FakeNvmlFunction: + def __init__(self, func): + self.argtypes = None + self.restype = None + self._func = func + + def __call__(self, *args): + return self._func(*args) + + +class _FakeNvmlLib: + def __init__( + self, + *, + init_status: int = 0, + driver_release_version: str = "595.58.03", + query_status: int = 0, + shutdown_statuses: tuple[int, ...] = (0,), + ): + self.shutdown_calls = 0 + remaining_shutdown_statuses = list(shutdown_statuses) + + self.nvmlInit_v2 = _FakeNvmlFunction(lambda: init_status) + + def nvml_system_get_driver_version(version_buffer, _buffer_length) -> int: + if query_status != 0: + return query_status + version_buffer.value = driver_release_version.encode() + return 0 + + self.nvmlSystemGetDriverVersion = _FakeNvmlFunction(nvml_system_get_driver_version) + + def nvml_shutdown() -> int: + self.shutdown_calls += 1 + if remaining_shutdown_statuses: + return remaining_shutdown_statuses.pop(0) + return 0 + + self.nvmlShutdown = _FakeNvmlFunction(nvml_shutdown) + + def _loaded_cuda(abs_path: str) -> LoadedDL: return LoadedDL( abs_path=abs_path, @@ -42,6 +85,106 @@ def _loaded_cuda(abs_path: str) -> LoadedDL: ) +def _loaded_nvml(abs_path: str) -> LoadedDL: + return LoadedDL( + abs_path=abs_path, + was_already_loaded_from_elsewhere=False, + _handle_uint=0xCAFE, + found_via="system-search", + ) + + +def test_driver_release_version_from_text_parses_branch(): + assert driver_info.DriverReleaseVersion.from_text("595.58.03") == driver_info.DriverReleaseVersion( + text="595.58.03", + components=(595, 58, 3), + branch=595, + ) + + +def test_query_driver_release_version_wraps_internal_failures(monkeypatch): + root_cause = RuntimeError("low-level release query failed") + + def fail_query_driver_release_version_text() -> str: + raise root_cause + + monkeypatch.setattr(driver_info, "_query_driver_release_version_text", fail_query_driver_release_version_text) + + with pytest.raises( + driver_info.QueryDriverReleaseVersionError, + match="Failed to query the display-driver release version", + ) as exc_info: + driver_info.query_driver_release_version() + + assert exc_info.value.__cause__ is root_cause + + +def test_query_driver_release_version_text_uses_nvml(monkeypatch): + fake_nvml_lib = _FakeNvmlLib(driver_release_version="595.58.03") + loaded_paths: list[str] = [] + + monkeypatch.setattr( + driver_info, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_nvml("/usr/lib/libnvidia-ml.so.1"), + ) + + def fake_cdll(abs_path: str): + loaded_paths.append(abs_path) + return fake_nvml_lib + + monkeypatch.setattr(driver_info.ctypes, "CDLL", fake_cdll) + + assert driver_info._query_driver_release_version_text() == "595.58.03" + assert loaded_paths == ["/usr/lib/libnvidia-ml.so.1"] + assert fake_nvml_lib.shutdown_calls == 1 + + +def test_query_driver_release_version_text_raises_when_nvml_call_fails(monkeypatch): + fake_nvml_lib = _FakeNvmlLib(query_status=1) + + monkeypatch.setattr( + driver_info, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_nvml("/usr/lib/libnvidia-ml.so.1"), + ) + monkeypatch.setattr(driver_info.ctypes, "CDLL", lambda _abs_path: fake_nvml_lib) + + with pytest.raises(RuntimeError, match=r"nvmlSystemGetDriverVersion\(\) \(status=1\)"): + driver_info._query_driver_release_version_text() + assert fake_nvml_lib.shutdown_calls == 1 + + +def test_query_driver_release_version_text_raises_when_only_shutdown_fails(monkeypatch): + fake_nvml_lib = _FakeNvmlLib(shutdown_statuses=(2,)) + + monkeypatch.setattr( + driver_info, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_nvml("/usr/lib/libnvidia-ml.so.1"), + ) + monkeypatch.setattr(driver_info.ctypes, "CDLL", lambda _abs_path: fake_nvml_lib) + + with pytest.raises(RuntimeError, match=r"nvmlShutdown\(\) \(status=2\)"): + driver_info._query_driver_release_version_text() + assert fake_nvml_lib.shutdown_calls == 1 + + +def test_query_driver_release_version_text_body_error_wins_when_both_fail(monkeypatch): + fake_nvml_lib = _FakeNvmlLib(query_status=1, shutdown_statuses=(2,)) + + monkeypatch.setattr( + driver_info, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_nvml("/usr/lib/libnvidia-ml.so.1"), + ) + monkeypatch.setattr(driver_info.ctypes, "CDLL", lambda _abs_path: fake_nvml_lib) + + with pytest.raises(RuntimeError, match=r"nvmlSystemGetDriverVersion\(\) \(status=1\)"): + driver_info._query_driver_release_version_text() + assert fake_nvml_lib.shutdown_calls == 1 + + def test_query_driver_cuda_version_uses_windll_on_windows(monkeypatch): fake_driver_lib = _FakeDriverLib(status=0, version=12080) loaded_paths: list[str] = [] @@ -63,14 +206,15 @@ def fake_windll(abs_path: str): assert loaded_paths == [r"C:\Windows\System32\nvcuda.dll"] -def test_query_driver_cuda_version_returns_parsed_dataclass(monkeypatch): - monkeypatch.setattr(driver_info, "_query_driver_cuda_version_int", lambda: 12080) +def test_driver_cuda_version_from_encoded_returns_subclass_instance(): + version = driver_info.DriverCudaVersion.from_encoded(12080) - assert driver_info.query_driver_cuda_version() == driver_info.DriverCudaVersion( + assert version == driver_info.DriverCudaVersion( encoded=12080, major=12, minor=8, ) + assert type(version) is driver_info.DriverCudaVersion def test_query_driver_cuda_version_wraps_internal_failures(monkeypatch): diff --git a/cuda_pathfinder/tests/test_utils_toolkit_info.py b/cuda_pathfinder/tests/test_utils_toolkit_info.py new file mode 100644 index 00000000000..a62db6b9603 --- /dev/null +++ b/cuda_pathfinder/tests/test_utils_toolkit_info.py @@ -0,0 +1,140 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from cuda.pathfinder._utils import toolkit_info + + +@pytest.fixture(autouse=True) +def _clear_cuda_header_version_cache(): + toolkit_info.read_cuda_header_version.cache_clear() + yield + toolkit_info.read_cuda_header_version.cache_clear() + + +def test_encoded_cuda_version_from_encoded_decodes_major_minor(): + assert toolkit_info.EncodedCudaVersion.from_encoded(13020) == toolkit_info.EncodedCudaVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_encoded_cuda_version_from_encoded_accepts_decimal_string(): + assert toolkit_info.EncodedCudaVersion.from_encoded("13020") == toolkit_info.EncodedCudaVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_encoded_cuda_version_from_encoded_raises_helpful_error_for_invalid_string(): + with pytest.raises( + ValueError, + match=r"EncodedCudaVersion\.from_encoded\(\) expected an integer or decimal string, got '13\.2'", + ): + toolkit_info.EncodedCudaVersion.from_encoded("13.2") + + +@pytest.mark.parametrize("encoded", [-1, "-1"]) +def test_encoded_cuda_version_from_encoded_rejects_negative_values(encoded): + with pytest.raises( + ValueError, + match=r"EncodedCudaVersion\.from_encoded\(\) expected a non-negative encoded CUDA version, got -1", + ): + toolkit_info.EncodedCudaVersion.from_encoded(encoded) + + +def test_parse_cuda_header_version_returns_parsed_dataclass(): + header_text = """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_VERSION 13020 + #endif + """ + + assert toolkit_info.parse_cuda_header_version(header_text) == toolkit_info.CudaToolkitVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_cuda_toolkit_version_from_encoded_returns_subclass_instance(): + version = toolkit_info.CudaToolkitVersion.from_encoded(12090) + + assert version == toolkit_info.CudaToolkitVersion( + encoded=12090, + major=12, + minor=9, + ) + assert type(version) is toolkit_info.CudaToolkitVersion + + +def test_parse_cuda_header_version_returns_none_when_macro_is_missing(): + header_text = """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_API_PER_THREAD_DEFAULT_STREAM 1 + #endif + """ + + assert toolkit_info.parse_cuda_header_version(header_text) is None + + +def test_read_cuda_header_version_reads_file_and_returns_parsed_dataclass(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_text( + """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_VERSION 12090 /* CUDA 12.9 */ + #endif + """, + encoding="utf-8", + ) + + assert toolkit_info.read_cuda_header_version(str(cuda_h_path)) == toolkit_info.CudaToolkitVersion( + encoded=12090, + major=12, + minor=9, + ) + + +def test_read_cuda_header_version_tolerates_non_utf8_bytes(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_bytes( + b"#ifndef CUDA_H\n" + b"#define CUDA_H\n" + b"\xff\xfe invalid bytes in comment or banner\n" + b"#define CUDA_VERSION 12080\n" + b"#endif\n" + ) + + assert toolkit_info.read_cuda_header_version(str(cuda_h_path)) == toolkit_info.CudaToolkitVersion( + encoded=12080, + major=12, + minor=8, + ) + + +def test_read_cuda_header_version_wraps_parse_failures(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_text( + """ + #ifndef CUDA_H + #define CUDA_H + #endif + """, + encoding="utf-8", + ) + + with pytest.raises( + toolkit_info.ReadCudaHeaderVersionError, + match="Failed to read the CUDA Toolkit version from cuda.h", + ) as exc_info: + toolkit_info.read_cuda_header_version(str(cuda_h_path)) + + assert isinstance(exc_info.value.__cause__, RuntimeError) + assert "does not define CUDA_VERSION" in str(exc_info.value.__cause__) diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 index 115720f6e5e..1c0b2999ffa 100644 --- a/toolshed/conda_create_for_pathfinder_testing.ps1 +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -7,22 +7,30 @@ param( ) $ErrorActionPreference = "Stop" +Set-StrictMode -Version Latest + +$cudaMajor = $CudaVersion.Split(".", 2)[0] +switch ($cudaMajor) { + "12" { $pythonVersion = "3.12" } + "13" { $pythonVersion = "3.14" } + default { + throw "Unsupported CUDA major version for this helper: $cudaMajor. Expected a 12.x or 13.x toolkit version." + } +} & "$env:CONDA_EXE" "shell.powershell" "hook" | Out-String | Invoke-Expression -conda create --yes -n "pathfinder_testing_cu$CudaVersion" python=3.13 "cuda-toolkit=$CudaVersion" +conda create --yes -n "pathfinder_testing_cu$CudaVersion" "python=$pythonVersion" "cuda-toolkit=$CudaVersion" conda activate "pathfinder_testing_cu$CudaVersion" +# Keep this list aligned with the Windows-installable subset of +# cuda_pathfinder/pyproject.toml. $cpkgs = @( "cusparselt-dev", "cutensor", - "libcublasmp-dev", + "cutlass", "libcudss-dev", - "libcufftmp-dev", - "libmathdx-dev", - "libnvshmem3", - "libnvshmem-dev", - "libnvpl-fft-dev" + "libmathdx-dev" ) foreach ($cpkg in $cpkgs) { diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh index 1ed57e6765b..8674bb1ed03 100755 --- a/toolshed/conda_create_for_pathfinder_testing.sh +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -3,26 +3,63 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + if [[ $# -ne 1 ]]; then echo "Usage: $(basename "$0") ctk-major-minor-patch" 1>&2 exit 1 fi +cuda_version="$1" +cuda_major="${cuda_version%%.*}" +uname_m="$(uname -m)" +case "$cuda_major" in + 12) + python_version=3.12 + ;; + 13) + python_version=3.14 + ;; + *) + echo "Unsupported CUDA major version for this helper: $cuda_major" 1>&2 + echo "Expected a 12.x or 13.x toolkit version." 1>&2 + exit 1 + ;; +esac + eval "$(conda shell.bash hook)" -conda create --yes -n "pathfinder_testing_cu$1" python=3.13 cuda-toolkit="$1" -conda activate "pathfinder_testing_cu$1" - -for cpkg in \ - cusparselt-dev \ - cutensor \ - libcublasmp-dev \ - libcudss-dev \ - libcufftmp-dev \ - libmathdx-dev \ - libnvshmem3 \ - libnvshmem-dev \ - libnvpl-fft-dev; do +conda create --yes -n "pathfinder_testing_cu$cuda_version" "python=$python_version" cuda-toolkit="$cuda_version" +set +u +conda activate "pathfinder_testing_cu$cuda_version" +set -u + +# Keep this list aligned with the Linux-installable subset of +# cuda_pathfinder/pyproject.toml. +cpkgs=( + "cusparselt-dev" + "cutensor" + "cutlass" + "libcublasmp-dev" + "libcudss-dev" + "libcufftmp-dev" + "libcusolvermp-dev" + "libmathdx-dev" + "libnvshmem3" + "libnvshmem-dev" +) + +# Keep the conda environment aligned with platform-scoped pyproject groups. +if [[ "$uname_m" == "aarch64" ]]; then + cpkgs+=("libnvpl-fft-dev") + if [[ "$cuda_major" == "13" ]]; then + cpkgs+=("libcudla-dev") + fi +fi + +for cpkg in "${cpkgs[@]}"; do echo "CONDA INSTALL: $cpkg" + set +u conda install -y -c conda-forge "$cpkg" + set -u done