Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backends/xnnpack/runtime/XNNPACKBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ class XnnpackBackend final
weights_cache_mutex_, std::defer_lock);
if (use_weight_cache) {
lock_weights_cache.lock();

const auto& cache_path = options_.get_packed_cache_path();
if (!cache_path.empty()) {
weights_cache_->set_packed_cache_path(cache_path);
}

weights_cache_->initialize_for_runtime(
context.get_runtime_allocator(), named_data_map);
workspace->set_uses_weight_cache();
Expand Down
4 changes: 4 additions & 0 deletions backends/xnnpack/runtime/XNNPACKBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ const char workspace_sharing_mode_option_key[] = "workspace_sharing_mode";
// across delegate instances. Changes only affect subsequently loaded models.
const char weight_cache_option_key[] = "weight_cache_enabled";

/// Path for the packed weight file. When set, reserve_space() allocates from
/// a MAP_SHARED file instead of heap; msync makes pages clean on iOS.
const char packed_cache_path_option_key[] = "packed_cache_path";

/// Workspace sharing mode. This is a backend option that can be set via the
/// set_option API to control memory sharing between CALL_DELEGATE instances.
/// This is useful for reducing memory consumption.
Expand Down
128 changes: 108 additions & 20 deletions backends/xnnpack/runtime/XNNWeightsCache.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
Expand All @@ -9,7 +9,13 @@
#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/memory_allocator.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cerrno>
#endif
#include <xnnpack.h>
#include <exception>
#include <memory>
Expand All @@ -27,18 +33,33 @@

XNNWeightsCache::XNNWeightsCache() {
weights_cache_.context = this;
weights_cache_.look_up = (size_t(*)(
weights_cache_.look_up = (size_t (*)(
void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up;
weights_cache_.reserve_space =
(void* (*)(void*, size_t))XNNWeightsCache::reserve_space;
weights_cache_.look_up_or_insert =
(size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
(size_t (*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
XNNWeightsCache::look_up_or_insert;
weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized;
weights_cache_.offset_to_addr =
(void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr;
weights_cache_.delete_cache =
(enum xnn_status(*)(void*))XNNWeightsCache::delete_cache;
(enum xnn_status (*)(void*))XNNWeightsCache::delete_cache;
}

XNNWeightsCache::~XNNWeightsCache() {
#ifndef _WIN32
for (auto& region : mmap_regions_) {
if (region.addr != nullptr && region.addr != MAP_FAILED) {
munmap(region.addr, region.size);
}
}
mmap_regions_.clear();
if (packed_file_fd_ >= 0) {
close(packed_file_fd_);
packed_file_fd_ = -1;
}
#endif
}

Error XNNWeightsCache::initialize_for_runtime(
Expand All @@ -48,6 +69,25 @@
named_data_map_ = named_data_map;
is_finalized_ = false;

#ifndef _WIN32
// Open the file for packed weights. Each reserve_space() call
// independently mmaps a region of the file.
if (!packed_cache_path_.empty() && packed_file_fd_ < 0) {
packed_file_fd_ =
open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0644);
if (packed_file_fd_ < 0) {
ET_LOG(
Error,
"Failed to open packed weight file: %s (errno=%d)",
packed_cache_path_.c_str(),
errno);
} else {
packed_file_used_ = 0;
ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str());
}
}
#endif

return Error::Ok;
}

Expand All @@ -73,6 +113,24 @@
}
}

#ifndef _WIN32
// Schedule async flush for newly added regions only.
// MS_ASYNC returns immediately; OS flushes in the background.
if (mmap_regions_.size() > mmap_regions_synced_) {
size_t new_count = mmap_regions_.size() - mmap_regions_synced_;
for (size_t i = mmap_regions_synced_; i < mmap_regions_.size(); ++i) {
msync(mmap_regions_[i].addr, mmap_regions_[i].size, MS_ASYNC);
}
mmap_regions_synced_ = mmap_regions_.size();
ET_LOG(
Info,
"Scheduled async flush: %zu new regions (%zu total), %zu MB packed weights",
new_count,
mmap_regions_.size(),
packed_file_used_ / (1024 * 1024));
}
#endif

return packed_data_names;
}

Expand Down Expand Up @@ -111,12 +169,11 @@
entry->second.ref_count--;
if (entry->second.ref_count == 0) {
void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
// Erase the key/value from the map frees the pointer holding the packed
// data
packed_pointer_to_container_.erase(packed_data_ptr);
// remove the pointer from the packed_data_ptrs_
// Only free heap-allocated packed data, not file-backed mmap'd data
if (packed_pointer_to_container_.count(packed_data_ptr) > 0) {
packed_pointer_to_container_.erase(packed_data_ptr);
}
packed_data_ptrs_[entry->second.offset] = nullptr;
// Erase the name to packed metadata entry
name_to_packed_data_metadata_.erase(entry->first);
}
}
Expand Down Expand Up @@ -158,17 +215,47 @@
return packed_weight_entry->second.offset;
}

/**
* Reserve space in the weight cache for n bytes of weight data, aligned to
* context->kPackedAllocationAlignment. This function will return nullptr if
* the allocation fails.
*/
void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
// MemoryAllocator* allocator = context->runtime_allocator_;
// void* reserved_pointer = allocator->allocate(n,
// context->kPackedAllocationAlignment);
#ifndef _WIN32
if (context->packed_file_fd_ >= 0) {
size_t page_size = sysconf(_SC_PAGESIZE);
size_t file_offset =
(context->packed_file_used_ + page_size - 1) & ~(page_size - 1);
size_t map_size = (n + page_size - 1) & ~(page_size - 1);

if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) {
ET_LOG(
Error,
"ftruncate to %zu failed (errno=%d)",
file_offset + map_size,
errno);
close(context->packed_file_fd_);
context->packed_file_fd_ = -1;
goto heap_fallback;
}

void* ptr = mmap(
nullptr,
map_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
context->packed_file_fd_,
file_offset);
if (ptr == MAP_FAILED) {
ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno);
close(context->packed_file_fd_);
context->packed_file_fd_ = -1;
goto heap_fallback;
}

context->packed_file_used_ = file_offset + map_size;
context->mmap_regions_.push_back({ptr, map_size});
return ptr;
}

heap_fallback:
#endif

// return reserved_pointer;
try {
std::string data_container;
size_t raw_allocation_size = n + context->kPackedAllocationAlignment - 1;
Expand All @@ -187,9 +274,6 @@
std::move(data_container);
return aligned_space;
} catch (std::bad_alloc& e) {
// XNNPACK can gracefully handle allocation failures, so return nullptr.
// We want to be able to recover from a failed attempt to load a large
// model without a crash.
ET_LOG(
Error,
"XNN weight cache failed to allocate %zu bytes: %s.",
Expand Down Expand Up @@ -267,6 +351,10 @@
return xnn_status_success;
}

void XNNWeightsCache::set_packed_cache_path(const std::string& path) {
packed_cache_path_ = path;
}

} // namespace delegate
} // namespace xnnpack
} // namespace backends
Expand Down
22 changes: 22 additions & 0 deletions backends/xnnpack/runtime/XNNWeightsCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ struct PackedDataMeta {
class XNNWeightsCache {
public:
XNNWeightsCache();
~XNNWeightsCache();

/**
* Initializes the XNNWeightsCache for the next xnn_create_runtime
Expand Down Expand Up @@ -115,6 +116,13 @@ class XNNWeightsCache {
*/
Error delete_packed_data(const std::vector<std::string>& packed_names);

/**
* Set the path for the file-backed packed weight storage.
* When set, reserve_space() allocates from a MAP_SHARED file instead
* of heap, and finalize_for_runtime() calls msync to make pages clean.
*/
void set_packed_cache_path(const std::string& path);

private:
// Runtime Allocator used to reserve memory for packed weights
MemoryAllocator* runtime_allocator_;
Expand All @@ -137,6 +145,20 @@ class XNNWeightsCache {
// whether or not the weight cache is finalized
bool is_finalized_;

// File-backed mmap for packed weights. When packed_cache_path_ is set,
// reserve_space() allocates from this mmap'd file instead of heap.
// After msync, pages become clean file-backed → 0 phys_footprint.
//
std::string packed_cache_path_;
int packed_file_fd_{-1};
size_t packed_file_used_{0};
struct MmapRegion {
void* addr;
size_t size;
};
std::vector<MmapRegion> mmap_regions_;
size_t mmap_regions_synced_{0};

// Function pointers to override XNNPACK's default xnn_weights_cache_provider
// functions.
static size_t look_up(
Expand Down
26 changes: 26 additions & 0 deletions backends/xnnpack/runtime/XnnpackBackendOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ Error XnnpackBackendOptions::get_option(BackendOption& option) const {
option.value = static_cast<int>(sharing_mode_.load());
} else if (strcmp(option.key, weight_cache_option_key) == 0) {
option.value = weight_cache_enabled_.load();
} else if (strcmp(option.key, packed_cache_path_option_key) == 0) {
std::array<char, runtime::kMaxOptionValueLength> arr{};
size_t len =
std::min(packed_cache_path_.size(), runtime::kMaxOptionValueLength - 1);
memcpy(arr.data(), packed_cache_path_.data(), len);
option.value = arr;
}
return Error::Ok;
}
Expand Down Expand Up @@ -66,6 +72,18 @@ Error XnnpackBackendOptions::set_option(const BackendOption& option) {
}
ET_LOG(Debug, "Setting XNNPACK weight cache enabled to %d.", *val);
weight_cache_enabled_.store(*val);
} else if (strcmp(option.key, packed_cache_path_option_key) == 0) {
auto* val = std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
&option.value);
if (!val) {
ET_LOG(Error, "XNNPACK packed cache path must be a string.");
return Error::InvalidArgument;
}
packed_cache_path_ = std::string(val->data());
ET_LOG(
Debug,
"Setting XNNPACK packed cache path to %s.",
packed_cache_path_.c_str());
}
return Error::Ok;
}
Expand Down Expand Up @@ -108,4 +126,12 @@ const XNNWorkspaceManager& XnnpackBackendOptions::workspace_manager() const {
return workspace_manager_;
}

const std::string& XnnpackBackendOptions::get_packed_cache_path() const {
return packed_cache_path_;
}

void XnnpackBackendOptions::set_packed_cache_path(const std::string& path) {
packed_cache_path_ = path;
}

} // namespace executorch::backends::xnnpack
5 changes: 5 additions & 0 deletions backends/xnnpack/runtime/XnnpackBackendOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class XnnpackBackendOptions {
XNNWorkspaceManager& workspace_manager();
const XNNWorkspaceManager& workspace_manager() const;

const std::string& get_packed_cache_path() const;
void set_packed_cache_path(const std::string& path);

private:
XNNWorkspaceManager workspace_manager_;

Expand All @@ -56,6 +59,8 @@ class XnnpackBackendOptions {
#else
std::atomic<bool> weight_cache_enabled_{false};
#endif

std::string packed_cache_path_;
};

} // namespace executorch::backends::xnnpack
Loading