From 32330a41aeb3cc7f5d369e57e2733b19c5996239 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 18 Apr 2026 16:00:28 -0700 Subject: [PATCH] fix(uffd): register guest memory with WRITE_PROTECT in addition to MISSING `guest_memory_from_uffd` registered each guest memory region with `Uffd::register(...)`, which is the convenience wrapper that uses `UFFDIO_REGISTER_MODE_MISSING` only. As a result, any UFFD handler that asks the kernel to keep a copied page write-protected (by passing `UFFDIO_COPY_MODE_WP`) gets a synchronous EINVAL on the very first read fault, because the destination range was never registered with `UFFDIO_REGISTER_MODE_WP`. WRITE_PROTECT registration is what enables the standard CoW snapshot pattern: the handler serves a missing page via UFFDIO_COPY with MODE_WP, the kernel re-faults on the next write to that page, and the handler observes (and can record) which pages got dirtied after restore. Without WP registration this pattern silently breaks the resume path. Switch `register(...)` to `register_with_mode(... MISSING | WRITE_PROTECT)`. `RegisterMode::WRITE_PROTECT` lives behind the `linux5_7` feature of the `userfaultfd` crate (UFFDIO_WRITEPROTECT was added in Linux 5.7), so also enable that feature on the dependency in `src/vmm/Cargo.toml`. Firecracker's minimum supported kernel is well past 5.7. This is a strict superset of the previous registration: existing handlers that don't pass MODE_WP behave identically. Signed-off-by: ValentaTomas --- src/vmm/Cargo.toml | 2 +- src/vmm/src/persist.rs | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 485d1598a66..d1b09ffce00 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -46,7 +46,7 @@ serde = { version = "1.0.228", features = ["derive", "rc"] } serde_json = "1.0.149" slab = "0.4.12" thiserror = "2.0.18" -userfaultfd = "0.9.0" +userfaultfd = { version = "0.9.0", features = ["linux5_7"] } utils = { path = "../utils" } uuid = "1.23.0" vhost = { version = "0.15.0", features = ["vhost-user-frontend"] } diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 2d7ccc5e5d7..e4a6f4a61ee 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex}; use semver::Version; use serde::{Deserialize, Serialize}; -use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; +use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder}; use vmm_sys_util::sock_ctrl_msg::ScmSocket; #[cfg(target_arch = "aarch64")] @@ -553,9 +553,23 @@ fn guest_memory_from_uffd( .create() .map_err(GuestMemoryFromUffdError::Create)?; + // Register every region for both MISSING and WRITE_PROTECT faults. + // + // MISSING is needed so the orchestrator's UFFD handler is woken up the first time the guest + // touches a page that has not yet been populated from the snapshot's memory file. + // + // WRITE_PROTECT is needed so the handler can keep pages it serves in a write-protected state + // (via UFFDIO_COPY_MODE_WP) and observe subsequent writes as new faults — the standard CoW + // tracking pattern that lets the orchestrator know which pages got dirtied after restore. + // Without WRITE_PROTECT registration, UFFDIO_COPY with MODE_WP fails synchronously with + // EINVAL on the very first read fault, breaking the snapshot resume path. for mem_region in guest_memory.iter() { - uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _) - .map_err(GuestMemoryFromUffdError::Register)?; + uffd.register_with_mode( + mem_region.as_ptr().cast(), + mem_region.size() as _, + RegisterMode::MISSING | RegisterMode::WRITE_PROTECT, + ) + .map_err(GuestMemoryFromUffdError::Register)?; } send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;