Move CUDA interop behind native opt-in#1067
Open
AnastaZIuk wants to merge 27 commits intovk_cuda_interopfrom
Open
Move CUDA interop behind native opt-in#1067AnastaZIuk wants to merge 27 commits intovk_cuda_interopfrom
AnastaZIuk wants to merge 27 commits intovk_cuda_interopfrom
Conversation
Comment on lines
+163
to
+210
| // Opt-in native CUDA API. The declarations below are implemented by the Nabla library. | ||
| // This header is intentionally the only public path that includes CUDA SDK types. | ||
| class NBL_API2 CCUDAHandlerAccessor | ||
| { | ||
| public: | ||
| static const CUDA& getCUDAFunctionTable(const CCUDAHandler& handler); | ||
| static const NVRTC& getNVRTCFunctionTable(const CCUDAHandler& handler); | ||
| static bool defaultHandleResult(CUresult result, const system::logger_opt_ptr& logger); | ||
| static bool defaultHandleResult(const CCUDAHandler& handler, CUresult result); | ||
| static bool defaultHandleResult(const CCUDAHandler& handler, nvrtcResult result); | ||
| static const core::vector<SCUDADeviceInfo>& getAvailableDevices(const CCUDAHandler& handler); | ||
| static nvrtcResult createProgram(CCUDAHandler& handler, nvrtcProgram* prog, std::string&& source, const char* name, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr); | ||
| static nvrtcResult compileProgram(const CCUDAHandler& handler, nvrtcProgram prog, core::SRange<const char* const> options); | ||
| static nvrtcResult getProgramLog(const CCUDAHandler& handler, nvrtcProgram prog, std::string& log); | ||
| static SPTXResult getPTX(const CCUDAHandler& handler, nvrtcProgram prog); | ||
| static SPTXResult compileDirectlyToPTX( | ||
| CCUDAHandler& handler, std::string&& source, const char* filename, core::SRange<const char* const> nvrtcOptions, | ||
| std::string& log, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr | ||
| ); | ||
| }; | ||
|
|
||
| class NBL_API2 CCUDADeviceAccessor | ||
| { | ||
| public: | ||
| static CUdevice getInternalObject(const CCUDADevice& device); | ||
| static CUcontext getContext(const CCUDADevice& device); | ||
| static size_t roundToGranularity(const CCUDADevice& device, CUmemLocationType location, size_t size); | ||
| static core::smart_refctd_ptr<CCUDAExportableMemory> createExportableMemory(CCUDADevice& device, SExportableMemoryCreationParams&& params); | ||
| }; | ||
|
|
||
| class NBL_API2 CCUDAExportableMemoryAccessor | ||
| { | ||
| public: | ||
| static CUdeviceptr getDeviceptr(const CCUDAExportableMemory& memory); | ||
| }; | ||
|
|
||
| class NBL_API2 CCUDAImportedMemoryAccessor | ||
| { | ||
| public: | ||
| static CUexternalMemory getInternalObject(const CCUDAImportedMemory& memory); | ||
| static CUresult getMappedBuffer(const CCUDAImportedMemory& memory, CUdeviceptr* mappedBuffer); | ||
| }; | ||
|
|
||
| class NBL_API2 CCUDAImportedSemaphoreAccessor | ||
| { | ||
| public: | ||
| static CUexternalSemaphore getInternalObject(const CCUDAImportedSemaphore& semaphore); | ||
| }; |
There was a problem hiding this comment.
accessors make no sense just move all the nbl/video/CCUDA*.h to the extension
Comment on lines
-298
to
-305
| #define ASSERT_CUDA_SUCCESS(expr, handler) \ | ||
| do { \ | ||
| const auto cudaResult = (expr); \ | ||
| if (!((handler)->defaultHandleResult(cudaResult))) { \ | ||
| assert(false); \ | ||
| } \ | ||
| } while(0) | ||
|
|
There was a problem hiding this comment.
that macro was useful just needs a rename
Comment on lines
-31
to
-52
| inline bool CloseExternalHandle(external_handle_t handle) | ||
| { | ||
| #ifdef _WIN32 | ||
| return CloseHandle(handle); | ||
| #else | ||
| return (close(handle) == 0); | ||
| #endif | ||
| } | ||
|
|
||
| inline external_handle_t DuplicateExternalHandle(external_handle_t handle) | ||
| { | ||
| #ifdef _WIN32 | ||
| HANDLE re = ExternalHandleNull; | ||
|
|
||
| const HANDLE cur = GetCurrentProcess(); | ||
| if (!DuplicateHandle(cur, handle, cur, &re, GENERIC_ALL, 0, DUPLICATE_SAME_ACCESS)) | ||
| return ExternalHandleNull; | ||
|
|
||
| return re; | ||
| #else | ||
| return dup(handle); | ||
| #endif |
There was a problem hiding this comment.
you may want to keep that inline, these are OS calls, and when they're inline they'll work BEFORE Nabla.dll is delay loaded, which is useful
Comment on lines
+1
to
+29
| #include "nbl/video/CUDAInterop.h" | ||
| #include "nbl/system/IApplicationFramework.h" | ||
|
|
||
| #include <type_traits> | ||
|
|
||
| #ifdef _NBL_COMPILE_WITH_CUDA_ | ||
| #error "Nabla::Nabla must not propagate the CUDA build define." | ||
| #endif | ||
|
|
||
| #ifdef CUDA_VERSION | ||
| #error "Nabla::Nabla must not require CUDA SDK headers." | ||
| #endif | ||
|
|
||
| namespace | ||
| { | ||
|
|
||
| class CUDAInteropCleanOptInSmoke final : public nbl::system::IApplicationFramework | ||
| { | ||
| using base_t = nbl::system::IApplicationFramework; | ||
|
|
||
| public: | ||
| using base_t::base_t; | ||
|
|
||
| bool onAppInitialized(nbl::core::smart_refctd_ptr<nbl::system::ISystem>&&) override | ||
| { | ||
| static_assert(std::is_class_v<nbl::video::CCUDADevice>); | ||
| static_assert(std::is_class_v<nbl::video::CCUDAExportableMemory>); | ||
| static_assert(std::is_class_v<nbl::video::CCUDAImportedMemory>); | ||
| static_assert(std::is_class_v<nbl::video::CCUDAImportedSemaphore>); |
There was a problem hiding this comment.
it would make more sense to not have anything CUDA related in Nabla itself
| const auto& granularity = SAccess::native(device).allocationGranularity[location]; | ||
| return ((size - 1) / granularity + 1) * granularity; | ||
| } | ||
|
|
There was a problem hiding this comment.
I mentioned in the original PR, this should be inline
Comment on lines
+104
to
+119
| if (!cuda_native::CCUDAHandlerAccessor::defaultHandleResult(*handler, cu.pcuMemAddressFree(ptr, size))) | ||
| assert(false); | ||
| return err; | ||
| } | ||
|
|
||
| CUmemAccessDesc accessDesc = { | ||
| .location = { .type = location, .id = m_handle }, | ||
| .location = { .type = location, .id = native.handle }, | ||
| .flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE, | ||
| }; | ||
|
|
||
| if (auto err = cu.pcuMemSetAccess(ptr, size, &accessDesc, 1); CUDA_SUCCESS != err) | ||
| { | ||
| ASSERT_CUDA_SUCCESS(cu.pcuMemUnmap(ptr, size), m_handler); | ||
| ASSERT_CUDA_SUCCESS(cu.pcuMemAddressFree(ptr, size), m_handler); | ||
| if (!cuda_native::CCUDAHandlerAccessor::defaultHandleResult(*handler, cu.pcuMemUnmap(ptr, size))) | ||
| assert(false); | ||
| if (!cuda_native::CCUDAHandlerAccessor::defaultHandleResult(*handler, cu.pcuMemAddressFree(ptr, size))) | ||
| assert(false); |
There was a problem hiding this comment.
@kevyuu thinking of it we shouldn't crash an entire program vecause of failure here :s
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Moves CUDA interop behind SDK-free Nabla headers with explicit
Nabla::ext::CUDAInteropnative opt-in. Keeps raw CUDA/NVRTC access available for consumers that ask for native opt-in while avoiding default public SDK requirements.