Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f4ce3dc
Move CUDA interop behind extension target
AnastaZIuk May 6, 2026
78845ae
Address CUDA interop review cleanup
AnastaZIuk May 6, 2026
ab9a7e5
Simplify CUDA interop smoke CMake
AnastaZIuk May 6, 2026
bf8eeb3
Clean CUDA interop smoke usage requirements
AnastaZIuk May 6, 2026
f701ac6
Export CUDA interop package target
AnastaZIuk May 6, 2026
a520d57
Use CUDAToolkit package targets
AnastaZIuk May 6, 2026
4bddc57
Require CUDA version via CMake
AnastaZIuk May 6, 2026
6f68e66
Split CUDA interop native surface
AnastaZIuk May 6, 2026
49bcb2c
Add native CUDA accessor overloads
AnastaZIuk May 6, 2026
d85657e
Document CUDA interop target split
AnastaZIuk May 6, 2026
6e8c4f9
Trim CUDA interop README wording
AnastaZIuk May 6, 2026
881e9b8
Move CUDA interop into Nabla
AnastaZIuk May 6, 2026
5dd1134
Document CUDA interop accessor model
AnastaZIuk May 7, 2026
e514df7
Inline CUDA interop stubs
AnastaZIuk May 7, 2026
e53c838
Refine CUDA interop boundary
AnastaZIuk May 7, 2026
1417905
Add CUDA interop runtime header discovery
AnastaZIuk May 7, 2026
045432e
Tighten CUDA interop native helpers
AnastaZIuk May 7, 2026
8a119dd
Hide CUDA interop native state construction
AnastaZIuk May 7, 2026
e018545
Clean up CUDA runtime header discovery
AnastaZIuk May 7, 2026
c6ef6ee
Move CUDA interop API back into video
AnastaZIuk May 7, 2026
d559a2c
Move smart pointer helpers into core
AnastaZIuk May 7, 2026
38705b9
Use CUDA interop accessors
AnastaZIuk May 7, 2026
23e6ef5
Use explicit CUDA compile log
AnastaZIuk May 7, 2026
a640183
Trim CUDA interop API surface
AnastaZIuk May 7, 2026
5bf0e2d
Keep CUDA SDK layouts private
AnastaZIuk May 7, 2026
d745421
Simplify CUDA interop helper
AnastaZIuk May 7, 2026
ffba3d4
Update CUDA interop examples pointer
AnastaZIuk May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ else()
message(STATUS "Vulkan SDK is not found")
endif()

option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" OFF)
option(NBL_COMPILE_WITH_CUDA "Build the CUDA interop extension?" OFF)
set(NBL_CUDA_TOOLKIT_ROOT "" CACHE PATH "Optional CUDA Toolkit root used when NBL_COMPILE_WITH_CUDA is ON")

if(NBL_COMPILE_WITH_CUDA)
find_package(CUDAToolkit REQUIRED)
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "13.0")
message(STATUS "CUDA version ${CUDAToolkit_VERSION} found!")
else()
message(FATAL_ERROR "CUDA version 13.0+ needed for C++14 support!")
if(NBL_CUDA_TOOLKIT_ROOT)
set(CUDAToolkit_ROOT "${NBL_CUDA_TOOLKIT_ROOT}")
endif()
find_package(CUDAToolkit 13.0 REQUIRED)
message(STATUS "CUDA version ${CUDAToolkit_VERSION} found!")
endif()

get_filename_component(NBL_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
Expand Down Expand Up @@ -183,13 +183,12 @@ option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON)
option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON)

option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF)
if(NBL_COMPILE_WITH_CUDA)
find_package(OPTIX REQUIRED)
message(STATUS "CUDA enabled and OptiX found!")
else()
if(NBL_BUILD_OPTIX)
if(NBL_BUILD_OPTIX)
if(NOT NBL_COMPILE_WITH_CUDA)
message(FATAL_ERROR "You cannot build Optix without enabled CUDA! NBL_COMPILE_WITH_CUDA must be ON!")
endif()
find_package(OPTIX REQUIRED)
message(STATUS "CUDA enabled and OptiX found!")
endif()

option(NBL_BUILD_BULLET "Enable Bullet Physics building and integration?" OFF)
Expand Down Expand Up @@ -313,6 +312,7 @@ if(NBL_ENABLE_CONFIG_INSTALL)
set(_NBL_NABLA_CONFIG_FILES
"${CMAKE_CURRENT_BINARY_DIR}/NablaConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/NablaConfigVersion.cmake"
"${CMAKE_CURRENT_LIST_DIR}/cmake/NablaCUDAInteropHelpers.cmake"
)

install(EXPORT NablaExportTargets
Expand Down
4 changes: 3 additions & 1 deletion cmake/FindZLIB.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ endif()

set(ZLIB_FOUND TRUE)
set(ZLIB_LIBRARY ZLIB::ZLIB)
set(ZLIB_INCLUDE_DIR "${THIRD_PARTY_SOURCE_DIR}/zlib;${THIRD_PARTY_BINARY_DIR}/zlib")
set(ZLIB_LIBRARIES ZLIB::ZLIB)
set(ZLIB_INCLUDE_DIR "${THIRD_PARTY_SOURCE_DIR}/zlib;${THIRD_PARTY_BINARY_DIR}/zlib")
set(ZLIB_INCLUDE_DIRS "${ZLIB_INCLUDE_DIR}")
28 changes: 28 additions & 0 deletions cmake/NablaCUDAInteropHelpers.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
function(nbl_target_link_cuda_interop TARGET_NAME SCOPE)
if(NOT SCOPE MATCHES "^(PRIVATE|PUBLIC|INTERFACE)$")
set(SCOPE PRIVATE)
endif()
cmake_parse_arguments(_NBL_CUDA_INTEROP "" "RUNTIME_JSON" "INCLUDE_DIRS" ${ARGN})
target_link_libraries("${TARGET_NAME}" ${SCOPE} Nabla::ext::CUDAInterop)
set(_include_dir_entries "")
foreach(_include_dir IN LISTS _NBL_CUDA_INTEROP_INCLUDE_DIRS CUDAToolkit_INCLUDE_DIRS)
if(_include_dir)
file(TO_CMAKE_PATH "${_include_dir}" _include_dir)
list(APPEND _include_dir_entries " \"${_include_dir}\"")
endif()
endforeach()
list(JOIN _include_dir_entries "," _include_dirs_json)
set(_runtime_json [=[
{
"cudaRuntimeIncludeDirs": [
@_include_dirs_json@
]
}
]=])
string(CONFIGURE "${_runtime_json}" _runtime_json @ONLY)
set(_runtime_json_path "$<TARGET_FILE_DIR:${TARGET_NAME}>/nbl_cuda_interop_runtime.json")
if(_NBL_CUDA_INTEROP_RUNTIME_JSON)
set(_runtime_json_path "${_NBL_CUDA_INTEROP_RUNTIME_JSON}")
endif()
file(GENERATE OUTPUT "${_runtime_json_path}" CONTENT "${_runtime_json}" TARGET "${TARGET_NAME}")
endfunction()
22 changes: 22 additions & 0 deletions cmake/NablaConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set(Nabla_DXC_GIT_INFO_JSON_FILE "${PACKAGE_PREFIX_DIR}/include/dxc_git_info.jso

set(_NBL_NABLA_LOAD_CORE OFF)
set(_NBL_NABLA_LOAD_NSC OFF)
set(_NBL_NABLA_LOAD_CUDA_INTEROP OFF)
set(_NBL_NABLA_COMPONENTS ${Nabla_FIND_COMPONENTS})
set(_NBL_NABLA_HAS_CORE_EXPORTS OFF)
set(_NBL_NABLA_HAS_NSC_EXPORTS OFF)
Expand All @@ -25,6 +26,10 @@ if(_NBL_NABLA_COMPONENTS)
elseif(_NBL_NABLA_COMPONENT STREQUAL "Core")
set(_NBL_NABLA_LOAD_CORE ON)
set(Nabla_Core_FOUND TRUE)
elseif(_NBL_NABLA_COMPONENT STREQUAL "CUDAInterop")
set(_NBL_NABLA_LOAD_CORE ON)
set(_NBL_NABLA_LOAD_CUDA_INTEROP ON)
set(Nabla_CUDAInterop_FOUND TRUE)
else()
set("Nabla_${_NBL_NABLA_COMPONENT}_FOUND" FALSE)
endif()
Expand Down Expand Up @@ -80,6 +85,23 @@ if(_NBL_NABLA_LOAD_NSC)
endif()
endif()

if(_NBL_NABLA_LOAD_CUDA_INTEROP)
include(CMakeFindDependencyMacro)

if(DEFINED Nabla_CUDA_TOOLKIT_ROOT AND NOT "${Nabla_CUDA_TOOLKIT_ROOT}" STREQUAL "")
set(CUDAToolkit_ROOT "${Nabla_CUDA_TOOLKIT_ROOT}")
endif()

find_dependency(CUDAToolkit 13.0 REQUIRED)
_nbl_try_include_component("CUDAInterop" "NablaCUDAInteropExportTargets.cmake" _NBL_NABLA_CUDA_INTEROP_FOUND)
if(_NBL_NABLA_CUDA_INTEROP_FOUND AND TARGET Nabla::ext::CUDAInterop)
target_link_libraries(Nabla::ext::CUDAInterop INTERFACE CUDA::toolkit)
if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/NablaCUDAInteropHelpers.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/NablaCUDAInteropHelpers.cmake")
endif()
endif()
endif()

check_required_components(Nabla)

#
Expand Down
214 changes: 214 additions & 0 deletions include/nbl/ext/CUDAInterop/CUDAInteropNative.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h
#ifndef _NBL_EXT_CUDA_INTEROP_NATIVE_H_INCLUDED_
#define _NBL_EXT_CUDA_INTEROP_NATIVE_H_INCLUDED_

#include "nbl/video/CUDAInterop.h"

#include "nbl/asset/ICPUBuffer.h"
#include "nbl/system/DynamicFunctionCaller.h"

#include <string>

#include "cuda.h"
#include "nvrtc.h"
#if CUDA_VERSION < 13000
#error "Need CUDA 13.0 SDK or higher."
#endif

namespace nbl::video::cuda_native
{

inline constexpr int MinimumCUDADriverVersion = 13000;
inline constexpr int MinimumNVRTCMajorVersion = MinimumCUDADriverVersion/1000;

using LibLoader = system::DefaultFuncPtrLoader;

NBL_SYSTEM_DECLARE_DYNAMIC_FUNCTION_CALLER_CLASS(CUDA,LibLoader
,cuCtxCreate_v4
,cuDevicePrimaryCtxRetain
,cuDevicePrimaryCtxRelease
,cuDevicePrimaryCtxSetFlags
,cuDevicePrimaryCtxGetState
,cuCtxDestroy_v2
,cuCtxEnablePeerAccess
,cuCtxGetApiVersion
,cuCtxGetCurrent
,cuCtxGetDevice
,cuCtxGetSharedMemConfig
,cuCtxPopCurrent_v2
,cuCtxPushCurrent_v2
,cuCtxSetCacheConfig
,cuCtxSetCurrent
,cuCtxSetSharedMemConfig
,cuCtxSynchronize
,cuDeviceComputeCapability
,cuDeviceCanAccessPeer
,cuDeviceGetCount
,cuDeviceGet
,cuDeviceGetAttribute
,cuDeviceGetLuid
,cuDeviceGetUuid_v2
,cuDeviceTotalMem_v2
,cuDeviceGetName
,cuDriverGetVersion
,cuEventCreate
,cuEventDestroy_v2
,cuEventElapsedTime
,cuEventQuery
,cuEventRecord
,cuEventSynchronize
,cuFuncGetAttribute
,cuFuncSetCacheConfig
,cuGetErrorName
,cuGetErrorString
,cuGraphicsMapResources
,cuGraphicsResourceGetMappedPointer_v2
,cuGraphicsResourceGetMappedMipmappedArray
,cuGraphicsSubResourceGetMappedArray
,cuGraphicsUnmapResources
,cuGraphicsUnregisterResource
,cuInit
,cuLaunchKernel
,cuMemAlloc_v2
,cuMemcpyDtoD_v2
,cuMemcpyDtoH_v2
,cuMemcpyHtoD_v2
,cuMemcpyDtoDAsync_v2
,cuMemcpyDtoHAsync_v2
,cuMemcpyHtoDAsync_v2
,cuMemGetAddressRange_v2
,cuMemFree_v2
,cuMemFreeHost
,cuMemGetInfo_v2
,cuMemHostAlloc
,cuMemHostRegister_v2
,cuMemHostUnregister
,cuMemsetD32_v2
,cuMemsetD32Async
,cuMemsetD8_v2
,cuMemsetD8Async
,cuModuleGetFunction
,cuModuleGetGlobal_v2
,cuModuleLoadDataEx
,cuModuleLoadFatBinary
,cuModuleUnload
,cuOccupancyMaxActiveBlocksPerMultiprocessor
,cuPointerGetAttribute
,cuStreamAddCallback
,cuStreamCreate
,cuStreamDestroy_v2
,cuStreamQuery
,cuStreamSynchronize
,cuStreamWaitEvent
,cuSurfObjectCreate
,cuSurfObjectDestroy
,cuTexObjectCreate
,cuTexObjectDestroy
,cuImportExternalMemory
,cuDestroyExternalMemory
,cuExternalMemoryGetMappedBuffer
,cuMemUnmap
,cuMemAddressFree
,cuMemGetAllocationGranularity
,cuMemAddressReserve
,cuMemCreate
,cuMemExportToShareableHandle
,cuMemMap
,cuMemRelease
,cuMemSetAccess
,cuMemImportFromShareableHandle
,cuLaunchHostFunc
,cuDestroyExternalSemaphore
,cuImportExternalSemaphore
,cuSignalExternalSemaphoresAsync
,cuWaitExternalSemaphoresAsync
,cuLogsRegisterCallback
);

NBL_SYSTEM_DECLARE_DYNAMIC_FUNCTION_CALLER_CLASS(NVRTC,LibLoader,
nvrtcGetErrorString,
nvrtcVersion,
nvrtcAddNameExpression,
nvrtcCompileProgram,
nvrtcCreateProgram,
nvrtcDestroyProgram,
nvrtcGetLoweredName,
nvrtcGetPTX,
nvrtcGetPTXSize,
nvrtcGetProgramLog,
nvrtcGetProgramLogSize
);

struct SCUDADeviceInfo
{
CUdevice handle = {};
CUuuid uuid = {};
};

struct SExportableMemoryCreationParams
{
size_t size;
uint32_t alignment;
CUmemLocationType location;
};

struct SPTXResult
{
core::smart_refctd_ptr<asset::ICPUBuffer> ptx;
nvrtcResult result;
};

// Opt-in native CUDA API. The declarations below are implemented by the Nabla library.
// This header is intentionally the only public path that includes CUDA SDK types.
class NBL_API2 CCUDAHandlerAccessor
{
public:
static const CUDA& getCUDAFunctionTable(const CCUDAHandler& handler);
static const NVRTC& getNVRTCFunctionTable(const CCUDAHandler& handler);
static bool defaultHandleResult(CUresult result, const system::logger_opt_ptr& logger);
static bool defaultHandleResult(const CCUDAHandler& handler, CUresult result);
static bool defaultHandleResult(const CCUDAHandler& handler, nvrtcResult result);
static const core::vector<SCUDADeviceInfo>& getAvailableDevices(const CCUDAHandler& handler);
static nvrtcResult createProgram(CCUDAHandler& handler, nvrtcProgram* prog, std::string&& source, const char* name, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr);
static nvrtcResult compileProgram(const CCUDAHandler& handler, nvrtcProgram prog, core::SRange<const char* const> options);
static nvrtcResult getProgramLog(const CCUDAHandler& handler, nvrtcProgram prog, std::string& log);
static SPTXResult getPTX(const CCUDAHandler& handler, nvrtcProgram prog);
static SPTXResult compileDirectlyToPTX(
CCUDAHandler& handler, std::string&& source, const char* filename, core::SRange<const char* const> nvrtcOptions,
std::string& log, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr
);
};

class NBL_API2 CCUDADeviceAccessor
{
public:
static CUdevice getInternalObject(const CCUDADevice& device);
static CUcontext getContext(const CCUDADevice& device);
static size_t roundToGranularity(const CCUDADevice& device, CUmemLocationType location, size_t size);
static core::smart_refctd_ptr<CCUDAExportableMemory> createExportableMemory(CCUDADevice& device, SExportableMemoryCreationParams&& params);
};

class NBL_API2 CCUDAExportableMemoryAccessor
{
public:
static CUdeviceptr getDeviceptr(const CCUDAExportableMemory& memory);
};

class NBL_API2 CCUDAImportedMemoryAccessor
{
public:
static CUexternalMemory getInternalObject(const CCUDAImportedMemory& memory);
static CUresult getMappedBuffer(const CCUDAImportedMemory& memory, CUdeviceptr* mappedBuffer);
};

class NBL_API2 CCUDAImportedSemaphoreAccessor
{
public:
static CUexternalSemaphore getInternalObject(const CCUDAImportedSemaphore& semaphore);
};
Comment on lines +163 to +210
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

accessors make no sense just move all the nbl/video/CCUDA*.h to the extension


}

#endif
4 changes: 2 additions & 2 deletions include/nbl/ext/OptiX/IDenoiser.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#ifndef __NBL_EXT_OPTIX_DENOISER_H_INCLUDED__
#define __NBL_EXT_OPTIX_DENOISER_H_INCLUDED__

#include "../../../../src/nbl/video/CCUDAHandler.h"
#include "nbl/video/CCUDAHandler.h"

#include <optix.h>
#include <optix_denoiser_tiling.h>
Expand Down Expand Up @@ -122,4 +122,4 @@ class IDenoiser final : public core::IReferenceCounted
}
}

#endif
#endif
8 changes: 4 additions & 4 deletions include/nbl/system/DefaultFuncPtrLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
namespace nbl::system
{

class DefaultFuncPtrLoader final : FuncPtrLoader
class NBL_API2 DefaultFuncPtrLoader final : FuncPtrLoader
{
void* lib;

public:
inline DefaultFuncPtrLoader() : lib(nullptr) {}
NBL_API2 DefaultFuncPtrLoader(const char* name);
DefaultFuncPtrLoader(const char* name);
inline DefaultFuncPtrLoader(DefaultFuncPtrLoader&& other) : DefaultFuncPtrLoader()
{
operator=(std::move(other));
}
NBL_API2 ~DefaultFuncPtrLoader();
~DefaultFuncPtrLoader();

inline DefaultFuncPtrLoader& operator=(DefaultFuncPtrLoader&& other)
{
Expand All @@ -40,4 +40,4 @@ class DefaultFuncPtrLoader final : FuncPtrLoader

}

#endif
#endif
Loading
Loading