diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index 9eaadda86f8..3a5d6ab7958 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -98,6 +98,12 @@ class XnnpackBackend final weights_cache_mutex_, std::defer_lock); if (use_weight_cache) { lock_weights_cache.lock(); + + const auto& cache_path = options_.get_packed_cache_path(); + if (!cache_path.empty()) { + weights_cache_->set_packed_cache_path(cache_path); + } + weights_cache_->initialize_for_runtime( context.get_runtime_allocator(), named_data_map); workspace->set_uses_weight_cache(); diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h index eb40047f3f8..7d24ac84627 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.h +++ b/backends/xnnpack/runtime/XNNPACKBackend.h @@ -13,6 +13,10 @@ const char workspace_sharing_mode_option_key[] = "workspace_sharing_mode"; // across delegate instances. Changes only affect subsequently loaded models. const char weight_cache_option_key[] = "weight_cache_enabled"; +/// Path for the packed weight file. When set, reserve_space() allocates from +/// a MAP_SHARED file instead of heap; msync makes pages clean on iOS. +const char packed_cache_path_option_key[] = "packed_cache_path"; + /// Workspace sharing mode. This is a backend option that can be set via the /// set_option API to control memory sharing between CALL_DELEGATE instances. /// This is useful for reducing memory consumption. diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp index 7767c65285a..eacbba6bf45 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.cpp +++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp @@ -9,7 +9,13 @@ #include #include #include +#ifndef _WIN32 +#include +#include #include +#include +#include +#endif #include #include #include @@ -27,18 +33,33 @@ using executorch::runtime::MemoryAllocator; XNNWeightsCache::XNNWeightsCache() { weights_cache_.context = this; - weights_cache_.look_up = (size_t(*)( + weights_cache_.look_up = (size_t (*)( void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up; weights_cache_.reserve_space = (void* (*)(void*, size_t))XNNWeightsCache::reserve_space; weights_cache_.look_up_or_insert = - (size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t)) + (size_t (*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t)) XNNWeightsCache::look_up_or_insert; weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized; weights_cache_.offset_to_addr = (void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr; weights_cache_.delete_cache = - (enum xnn_status(*)(void*))XNNWeightsCache::delete_cache; + (enum xnn_status (*)(void*))XNNWeightsCache::delete_cache; +} + +XNNWeightsCache::~XNNWeightsCache() { +#ifndef _WIN32 + for (auto& region : mmap_regions_) { + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + } + } + mmap_regions_.clear(); + if (packed_file_fd_ >= 0) { + close(packed_file_fd_); + packed_file_fd_ = -1; + } +#endif } Error XNNWeightsCache::initialize_for_runtime( @@ -48,6 +69,25 @@ Error XNNWeightsCache::initialize_for_runtime( named_data_map_ = named_data_map; is_finalized_ = false; +#ifndef _WIN32 + // Open the file for packed weights. Each reserve_space() call + // independently mmaps a region of the file. + if (!packed_cache_path_.empty() && packed_file_fd_ < 0) { + packed_file_fd_ = + open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0644); + if (packed_file_fd_ < 0) { + ET_LOG( + Error, + "Failed to open packed weight file: %s (errno=%d)", + packed_cache_path_.c_str(), + errno); + } else { + packed_file_used_ = 0; + ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str()); + } + } +#endif + return Error::Ok; } @@ -73,6 +113,24 @@ Result> XNNWeightsCache::finalize_for_runtime() { } } +#ifndef _WIN32 + // Schedule async flush for newly added regions only. + // MS_ASYNC returns immediately; OS flushes in the background. + if (mmap_regions_.size() > mmap_regions_synced_) { + size_t new_count = mmap_regions_.size() - mmap_regions_synced_; + for (size_t i = mmap_regions_synced_; i < mmap_regions_.size(); ++i) { + msync(mmap_regions_[i].addr, mmap_regions_[i].size, MS_ASYNC); + } + mmap_regions_synced_ = mmap_regions_.size(); + ET_LOG( + Info, + "Scheduled async flush: %zu new regions (%zu total), %zu MB packed weights", + new_count, + mmap_regions_.size(), + packed_file_used_ / (1024 * 1024)); + } +#endif + return packed_data_names; } @@ -111,12 +169,11 @@ Error XNNWeightsCache::delete_packed_data( entry->second.ref_count--; if (entry->second.ref_count == 0) { void* packed_data_ptr = packed_data_ptrs_[entry->second.offset]; - // Erase the key/value from the map frees the pointer holding the packed - // data - packed_pointer_to_container_.erase(packed_data_ptr); - // remove the pointer from the packed_data_ptrs_ + // Only free heap-allocated packed data, not file-backed mmap'd data + if (packed_pointer_to_container_.count(packed_data_ptr) > 0) { + packed_pointer_to_container_.erase(packed_data_ptr); + } packed_data_ptrs_[entry->second.offset] = nullptr; - // Erase the name to packed metadata entry name_to_packed_data_metadata_.erase(entry->first); } } @@ -158,17 +215,47 @@ size_t XNNWeightsCache::look_up( return packed_weight_entry->second.offset; } -/** - * Reserve space in the weight cache for n bytes of weight data, aligned to - * context->kPackedAllocationAlignment. This function will return nullptr if - * the allocation fails. - */ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { - // MemoryAllocator* allocator = context->runtime_allocator_; - // void* reserved_pointer = allocator->allocate(n, - // context->kPackedAllocationAlignment); +#ifndef _WIN32 + if (context->packed_file_fd_ >= 0) { + size_t page_size = sysconf(_SC_PAGESIZE); + size_t file_offset = + (context->packed_file_used_ + page_size - 1) & ~(page_size - 1); + size_t map_size = (n + page_size - 1) & ~(page_size - 1); + + if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) { + ET_LOG( + Error, + "ftruncate to %zu failed (errno=%d)", + file_offset + map_size, + errno); + close(context->packed_file_fd_); + context->packed_file_fd_ = -1; + goto heap_fallback; + } + + void* ptr = mmap( + nullptr, + map_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + context->packed_file_fd_, + file_offset); + if (ptr == MAP_FAILED) { + ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno); + close(context->packed_file_fd_); + context->packed_file_fd_ = -1; + goto heap_fallback; + } + + context->packed_file_used_ = file_offset + map_size; + context->mmap_regions_.push_back({ptr, map_size}); + return ptr; + } + +heap_fallback: +#endif - // return reserved_pointer; try { std::string data_container; size_t raw_allocation_size = n + context->kPackedAllocationAlignment - 1; @@ -187,9 +274,6 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { std::move(data_container); return aligned_space; } catch (std::bad_alloc& e) { - // XNNPACK can gracefully handle allocation failures, so return nullptr. - // We want to be able to recover from a failed attempt to load a large - // model without a crash. ET_LOG( Error, "XNN weight cache failed to allocate %zu bytes: %s.", @@ -267,6 +351,10 @@ enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) { return xnn_status_success; } +void XNNWeightsCache::set_packed_cache_path(const std::string& path) { + packed_cache_path_ = path; +} + } // namespace delegate } // namespace xnnpack } // namespace backends diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h index f8371f93d01..0cc4c3d9a68 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.h +++ b/backends/xnnpack/runtime/XNNWeightsCache.h @@ -41,6 +41,7 @@ struct PackedDataMeta { class XNNWeightsCache { public: XNNWeightsCache(); + ~XNNWeightsCache(); /** * Initializes the XNNWeightsCache for the next xnn_create_runtime @@ -115,6 +116,13 @@ class XNNWeightsCache { */ Error delete_packed_data(const std::vector& packed_names); + /** + * Set the path for the file-backed packed weight storage. + * When set, reserve_space() allocates from a MAP_SHARED file instead + * of heap, and finalize_for_runtime() calls msync to make pages clean. + */ + void set_packed_cache_path(const std::string& path); + private: // Runtime Allocator used to reserve memory for packed weights MemoryAllocator* runtime_allocator_; @@ -137,6 +145,20 @@ class XNNWeightsCache { // whether or not the weight cache is finalized bool is_finalized_; + // File-backed mmap for packed weights. When packed_cache_path_ is set, + // reserve_space() allocates from this mmap'd file instead of heap. + // After msync, pages become clean file-backed → 0 phys_footprint. + // + std::string packed_cache_path_; + int packed_file_fd_{-1}; + size_t packed_file_used_{0}; + struct MmapRegion { + void* addr; + size_t size; + }; + std::vector mmap_regions_; + size_t mmap_regions_synced_{0}; + // Function pointers to override XNNPACK's default xnn_weights_cache_provider // functions. static size_t look_up( diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp index aa5f6f0302b..ffaba9508d8 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp @@ -37,6 +37,12 @@ Error XnnpackBackendOptions::get_option(BackendOption& option) const { option.value = static_cast(sharing_mode_.load()); } else if (strcmp(option.key, weight_cache_option_key) == 0) { option.value = weight_cache_enabled_.load(); + } else if (strcmp(option.key, packed_cache_path_option_key) == 0) { + std::array arr{}; + size_t len = + std::min(packed_cache_path_.size(), runtime::kMaxOptionValueLength - 1); + memcpy(arr.data(), packed_cache_path_.data(), len); + option.value = arr; } return Error::Ok; } @@ -66,6 +72,18 @@ Error XnnpackBackendOptions::set_option(const BackendOption& option) { } ET_LOG(Debug, "Setting XNNPACK weight cache enabled to %d.", *val); weight_cache_enabled_.store(*val); + } else if (strcmp(option.key, packed_cache_path_option_key) == 0) { + auto* val = std::get_if>( + &option.value); + if (!val) { + ET_LOG(Error, "XNNPACK packed cache path must be a string."); + return Error::InvalidArgument; + } + packed_cache_path_ = std::string(val->data()); + ET_LOG( + Debug, + "Setting XNNPACK packed cache path to %s.", + packed_cache_path_.c_str()); } return Error::Ok; } @@ -108,4 +126,12 @@ const XNNWorkspaceManager& XnnpackBackendOptions::workspace_manager() const { return workspace_manager_; } +const std::string& XnnpackBackendOptions::get_packed_cache_path() const { + return packed_cache_path_; +} + +void XnnpackBackendOptions::set_packed_cache_path(const std::string& path) { + packed_cache_path_ = path; +} + } // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.h b/backends/xnnpack/runtime/XnnpackBackendOptions.h index ab6c93c21a3..aed037ac835 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.h +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.h @@ -41,6 +41,9 @@ class XnnpackBackendOptions { XNNWorkspaceManager& workspace_manager(); const XNNWorkspaceManager& workspace_manager() const; + const std::string& get_packed_cache_path() const; + void set_packed_cache_path(const std::string& path); + private: XNNWorkspaceManager workspace_manager_; @@ -56,6 +59,8 @@ class XnnpackBackendOptions { #else std::atomic weight_cache_enabled_{false}; #endif + + std::string packed_cache_path_; }; } // namespace executorch::backends::xnnpack