From b7b2c2efab55cb2f05206a6465ceef4418ed84e6 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:53:25 -0700 Subject: [PATCH 1/5] fix: zero-fill NOBITS sections in ELF loader Some linkers emit PT_LOAD segments where filesz == memsz but contain .bss sections whose VMA range overlaps with file bytes from unrelated sections. The loader copies the full segment verbatim, leaving .bss with stale data instead of zeros. Collect NOBITS section ranges (excluding .tbss) during ELF parsing and zero-fill them after loading PT_LOAD segments. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Co-authored-by: danbugs --- src/hyperlight_host/src/mem/elf.rs | 37 ++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_host/src/mem/elf.rs b/src/hyperlight_host/src/mem/elf.rs index 16e506eac..7d3d0fb96 100644 --- a/src/hyperlight_host/src/mem/elf.rs +++ b/src/hyperlight_host/src/mem/elf.rs @@ -21,10 +21,7 @@ use std::sync::Arc; use goblin::elf::reloc::{R_AARCH64_NONE, R_AARCH64_RELATIVE}; #[cfg(target_arch = "x86_64")] use goblin::elf::reloc::{R_X86_64_NONE, R_X86_64_RELATIVE}; -use goblin::elf::{Elf, ProgramHeaders, Reloc}; -#[cfg(feature = "nanvix-unstable")] -use goblin::elf32::program_header::PT_LOAD; -#[cfg(not(feature = "nanvix-unstable"))] +use goblin::elf::{Elf, ProgramHeaders, Reloc, section_header}; use goblin::elf64::program_header::PT_LOAD; use super::exe::LoadInfo; @@ -45,6 +42,8 @@ pub(crate) struct ElfInfo { shdrs: Vec, entry: u64, relocs: Vec, + /// (addr, size) of NOBITS sections that need zero-filling (excludes .tbss). + nobits_ranges: Vec<(u64, u64)>, /// The hyperlight version string embedded by `hyperlight-guest-bin`, if /// present. Used to detect version/ABI mismatches between guest and host. guest_bin_version: Option, @@ -128,6 +127,20 @@ impl ElfInfo { // hyperlight-guest-bin. let guest_bin_version = Self::read_version_note(&elf, bytes); + // Collect NOBITS sections (e.g. .bss) that need zero-filling. + // Skip .tbss (SHF_TLS) since thread-local BSS is allocated per-thread. + let nobits_ranges: Vec<(u64, u64)> = { + elf.section_headers + .iter() + .filter(|sh| { + sh.sh_type == section_header::SHT_NOBITS + && sh.sh_size > 0 + && (sh.sh_flags & u64::from(section_header::SHF_TLS)) == 0 + }) + .map(|sh| (sh.sh_addr, sh.sh_size)) + .collect() + }; + Ok(ElfInfo { payload: bytes.to_vec(), phdrs: elf.program_headers, @@ -146,6 +159,7 @@ impl ElfInfo { .collect(), entry: elf.entry, relocs, + nobits_ranges, guest_bin_version, }) } @@ -206,6 +220,21 @@ impl ElfInfo { .copy_from_slice(&self.payload[payload_offset..payload_offset + payload_len]); target[start_va + payload_len..start_va + phdr.p_memsz as usize].fill(0); } + // Zero-fill NOBITS sections (e.g. .bss) that were not already + // covered by the filesz < memsz zeroing above. + for &(addr, size) in &self.nobits_ranges { + let sh_start = (addr - base_va) as usize; + let sh_end = sh_start + size as usize; + if sh_end <= target.len() { + target[sh_start..sh_end].fill(0); + } else { + tracing::warn!( + "NOBITS section at VA {:#x} (size {:#x}) extends past loaded image, skipping zero-fill", + addr, + size + ); + } + } let get_addend = |name, r: &Reloc| { r.r_addend .ok_or_else(|| new_error!("{} missing addend", name)) From ed50d48fe4dfdb8c669e3681eb023c85c3d8252f Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:18:39 -0700 Subject: [PATCH 2/5] refactor: remove PEB file_mappings and related code Remove the nanvix-unstable-gated file_mappings field from HyperlightPEB and all host-side code that wrote to it: - write_file_mapping_entry in mgr.rs - PEB layout calculations (array sizing, heap offset, getter methods) - PEB file_mapping writes in write_peb and map_file_cow - 3 PEB test functions (multiuse, deferred, multiple_entries) - evolve-time write_file_mapping_entry call Embedders that need file mapping metadata can pass it through init_data instead of the PEB struct. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Co-authored-by: danbugs --- src/hyperlight_common/src/mem.rs | 7 - src/hyperlight_host/src/mem/layout.rs | 89 +----- src/hyperlight_host/src/mem/mgr.rs | 63 ---- .../src/sandbox/file_mapping.rs | 2 +- .../src/sandbox/initialized_multi_use.rs | 287 +----------------- .../src/sandbox/uninitialized_evolve.rs | 9 - 6 files changed, 4 insertions(+), 453 deletions(-) diff --git a/src/hyperlight_common/src/mem.rs b/src/hyperlight_common/src/mem.rs index fb850acc8..d577c7954 100644 --- a/src/hyperlight_common/src/mem.rs +++ b/src/hyperlight_common/src/mem.rs @@ -72,11 +72,4 @@ pub struct HyperlightPEB { pub output_stack: GuestMemoryRegion, pub init_data: GuestMemoryRegion, pub guest_heap: GuestMemoryRegion, - /// File mappings array descriptor. - /// **Note:** `size` holds the **entry count** (number of valid - /// [`FileMappingInfo`] entries), NOT a byte size. `ptr` holds the - /// guest address of the preallocated array (immediately after the - /// PEB struct). - #[cfg(feature = "nanvix-unstable")] - pub file_mappings: GuestMemoryRegion, } diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index b55189969..2177848c8 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -227,8 +227,6 @@ pub(crate) struct SandboxMemoryLayout { peb_output_data_offset: usize, peb_init_data_offset: usize, peb_heap_data_offset: usize, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset: usize, guest_heap_buffer_offset: usize, init_data_offset: usize, @@ -281,11 +279,6 @@ impl Debug for SandboxMemoryLayout { "Guest Heap Offset", &format_args!("{:#x}", self.peb_heap_data_offset), ); - #[cfg(feature = "nanvix-unstable")] - ff.field( - "File Mappings Offset", - &format_args!("{:#x}", self.peb_file_mappings_offset), - ); ff.field( "Guest Heap Buffer Offset", &format_args!("{:#x}", self.guest_heap_buffer_offset), @@ -353,29 +346,11 @@ impl SandboxMemoryLayout { let peb_output_data_offset = peb_offset + offset_of!(HyperlightPEB, output_stack); let peb_init_data_offset = peb_offset + offset_of!(HyperlightPEB, init_data); let peb_heap_data_offset = peb_offset + offset_of!(HyperlightPEB, guest_heap); - #[cfg(feature = "nanvix-unstable")] - let peb_file_mappings_offset = peb_offset + offset_of!(HyperlightPEB, file_mappings); // The following offsets are the actual values that relate to memory layout, // which are written to PEB struct let peb_address = Self::BASE_ADDRESS + peb_offset; // make sure heap buffer starts at 4K boundary. - // The FileMappingInfo array is stored immediately after the PEB struct. - // We statically reserve space for MAX_FILE_MAPPINGS entries so that - // the heap never overlaps the array, even when all slots are used. - // The host writes file mapping metadata here via write_file_mapping_entry; - // the guest only reads the entries. We don't know at layout time how - // many file mappings the host will register, so we reserve space for - // the maximum number. - // The heap starts at the next page boundary after this reserved area. - #[cfg(feature = "nanvix-unstable")] - let file_mappings_array_end = peb_offset - + size_of::() - + hyperlight_common::mem::MAX_FILE_MAPPINGS - * size_of::(); - #[cfg(feature = "nanvix-unstable")] - let guest_heap_buffer_offset = file_mappings_array_end.next_multiple_of(PAGE_SIZE_USIZE); - #[cfg(not(feature = "nanvix-unstable"))] let guest_heap_buffer_offset = (peb_offset + size_of::()).next_multiple_of(PAGE_SIZE_USIZE); @@ -389,8 +364,6 @@ impl SandboxMemoryLayout { peb_output_data_offset, peb_init_data_offset, peb_heap_data_offset, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset, sandbox_memory_config: cfg, code_size, guest_heap_buffer_offset, @@ -514,32 +487,6 @@ impl SandboxMemoryLayout { self.peb_heap_data_offset } - /// Get the offset in guest memory to the file_mappings count field - /// (the `size` field of the `GuestMemoryRegion` in the PEB). - #[cfg(feature = "nanvix-unstable")] - pub(crate) fn get_file_mappings_size_offset(&self) -> usize { - self.peb_file_mappings_offset - } - - /// Get the offset in guest memory to the file_mappings pointer field. - #[cfg(feature = "nanvix-unstable")] - fn get_file_mappings_pointer_offset(&self) -> usize { - self.get_file_mappings_size_offset() + size_of::() - } - - /// Get the offset in snapshot memory where the FileMappingInfo array starts - /// (immediately after the PEB struct, within the same page). - #[cfg(feature = "nanvix-unstable")] - pub(crate) fn get_file_mappings_array_offset(&self) -> usize { - self.peb_offset + size_of::() - } - - /// Get the guest address of the FileMappingInfo array. - #[cfg(feature = "nanvix-unstable")] - fn get_file_mappings_array_gva(&self) -> u64 { - (Self::BASE_ADDRESS + self.get_file_mappings_array_offset()) as u64 - } - /// Get the offset of the heap pointer in guest memory, #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_heap_pointer_offset(&self) -> usize { @@ -643,19 +590,7 @@ impl SandboxMemoryLayout { )); } - // PEB + preallocated FileMappingInfo array - #[cfg(feature = "nanvix-unstable")] - let heap_offset = { - let peb_and_array_size = size_of::() - + hyperlight_common::mem::MAX_FILE_MAPPINGS - * size_of::(); - builder.push_page_aligned( - peb_and_array_size, - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, - Peb, - ) - }; - #[cfg(not(feature = "nanvix-unstable"))] + // PEB let heap_offset = builder.push_page_aligned(size_of::(), MemoryRegionFlags::READ, Peb); @@ -796,21 +731,6 @@ impl SandboxMemoryLayout { write_u64(mem, self.get_heap_size_offset(), self.heap_size.try_into()?)?; write_u64(mem, self.get_heap_pointer_offset(), addr)?; - // Set up the file_mappings descriptor in the PEB. - // - The `size` field holds the number of valid FileMappingInfo - // entries currently written (initially 0 — entries are added - // later by map_file_cow / evolve). - // - The `ptr` field holds the guest address of the preallocated - // FileMappingInfo array - #[cfg(feature = "nanvix-unstable")] - write_u64(mem, self.get_file_mappings_size_offset(), 0)?; - #[cfg(feature = "nanvix-unstable")] - write_u64( - mem, - self.get_file_mappings_pointer_offset(), - self.get_file_mappings_array_gva(), - )?; - // End of setting up the PEB // The input and output data regions do not have their layout @@ -865,12 +785,7 @@ mod tests { // in order of layout expected_size += layout.code_size; - // PEB + preallocated FileMappingInfo array - #[cfg(feature = "nanvix-unstable")] - let peb_and_array = size_of::() - + hyperlight_common::mem::MAX_FILE_MAPPINGS - * size_of::(); - #[cfg(not(feature = "nanvix-unstable"))] + // PEB let peb_and_array = size_of::(); expected_size += peb_and_array.next_multiple_of(PAGE_SIZE_USIZE); diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 98c70734b..dddab493a 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -13,8 +13,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#[cfg(feature = "nanvix-unstable")] -use std::mem::offset_of; use flatbuffers::FlatBufferBuilder; use hyperlight_common::flatbuffer_wrappers::function_call::{ @@ -339,67 +337,6 @@ impl SandboxMemoryManager { } impl SandboxMemoryManager { - /// Write a [`FileMappingInfo`] entry into the PEB's preallocated array. - /// - /// Reads the current entry count from the PEB, validates that the - /// array isn't full ([`MAX_FILE_MAPPINGS`]), writes the entry at the - /// next available slot, and increments the count. - /// - /// This is the **only** place that writes to the PEB file mappings - /// array — both `MultiUseSandbox::map_file_cow` and the evolve loop - /// call through here so the logic is not duplicated. - /// - /// # Errors - /// - /// Returns an error if [`MAX_FILE_MAPPINGS`] has been reached. - /// - /// [`FileMappingInfo`]: hyperlight_common::mem::FileMappingInfo - /// [`MAX_FILE_MAPPINGS`]: hyperlight_common::mem::MAX_FILE_MAPPINGS - #[cfg(feature = "nanvix-unstable")] - pub(crate) fn write_file_mapping_entry( - &mut self, - guest_addr: u64, - size: u64, - label: &[u8; hyperlight_common::mem::FILE_MAPPING_LABEL_MAX_LEN + 1], - ) -> Result<()> { - use hyperlight_common::mem::{FileMappingInfo, MAX_FILE_MAPPINGS}; - - // Read the current entry count from the PEB. This is the source - // of truth — it survives snapshot/restore because the PEB is - // part of shared memory that gets snapshotted. - let current_count = - self.shared_mem - .read::(self.layout.get_file_mappings_size_offset())? as usize; - - if current_count >= MAX_FILE_MAPPINGS { - return Err(crate::new_error!( - "file mapping limit reached ({} of {})", - current_count, - MAX_FILE_MAPPINGS, - )); - } - - // Write the entry into the next available slot. - let entry_offset = self.layout.get_file_mappings_array_offset() - + current_count * std::mem::size_of::(); - let guest_addr_offset = offset_of!(FileMappingInfo, guest_addr); - let size_offset = offset_of!(FileMappingInfo, size); - let label_offset = offset_of!(FileMappingInfo, label); - self.shared_mem - .write::(entry_offset + guest_addr_offset, guest_addr)?; - self.shared_mem - .write::(entry_offset + size_offset, size)?; - self.shared_mem - .copy_from_slice(label, entry_offset + label_offset)?; - - // Increment the entry count. - let new_count = (current_count + 1) as u64; - self.shared_mem - .write::(self.layout.get_file_mappings_size_offset(), new_count)?; - - Ok(()) - } - /// Reads a host function call from memory #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_host_function_call(&mut self) -> Result { diff --git a/src/hyperlight_host/src/sandbox/file_mapping.rs b/src/hyperlight_host/src/sandbox/file_mapping.rs index 1ebe1f262..9b26ae879 100644 --- a/src/hyperlight_host/src/sandbox/file_mapping.rs +++ b/src/hyperlight_host/src/sandbox/file_mapping.rs @@ -58,7 +58,7 @@ pub(crate) struct PreparedFileMapping { /// The page-aligned size of the mapping in bytes. pub(crate) size: usize, /// Null-terminated C-style label for this mapping (max 63 chars + null). - #[cfg_attr(not(feature = "nanvix-unstable"), allow(unused))] + #[allow(unused)] pub(crate) label: [u8; hyperlight_common::mem::FILE_MAPPING_LABEL_MAX_LEN + 1], /// Host-side OS resources. `None` after successful consumption /// by the apply step (ownership transferred to the VM layer). diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 72de96035..bbc4806f0 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -560,23 +560,6 @@ impl MultiUseSandbox { return Err(crate::HyperlightError::PoisonedSandbox); } - // Pre-check the file mapping limit before doing any expensive - // OS or VM work. The PEB count is the source of truth. - #[cfg(feature = "nanvix-unstable")] - let current_count = self - .mem_mgr - .shared_mem - .read::(self.mem_mgr.layout.get_file_mappings_size_offset())? - as usize; - #[cfg(feature = "nanvix-unstable")] - if current_count >= hyperlight_common::mem::MAX_FILE_MAPPINGS { - return Err(crate::HyperlightError::Error(format!( - "map_file_cow: file mapping limit reached ({} of {})", - current_count, - hyperlight_common::mem::MAX_FILE_MAPPINGS, - ))); - } - // Phase 1: host-side OS work (open file, create mapping) let mut prepared = prepare_file_cow(file_path, guest_base, label)?; @@ -636,14 +619,6 @@ impl MultiUseSandbox { prepared.mark_consumed(); self.mem_mgr.mapped_rgns += 1; - // Record the mapping metadata in the PEB. If this fails the VM - // still holds a valid mapping but the PEB won't list it — the - // limit was already pre-checked above so this should not fail - // in practice. - #[cfg(feature = "nanvix-unstable")] - self.mem_mgr - .write_file_mapping_entry(prepared.guest_base, size, &prepared.label)?; - Ok(size) } @@ -2134,267 +2109,7 @@ mod tests { let _ = std::fs::remove_file(&path); } - /// Tests that `map_file_cow` on a MultiUseSandbox correctly writes - /// the FileMappingInfo entry (count, guest_addr, size, label) into - /// the PEB. - #[test] - #[cfg(feature = "nanvix-unstable")] - fn test_map_file_cow_peb_entry_multiuse() { - use std::mem::offset_of; - - use hyperlight_common::mem::{FILE_MAPPING_LABEL_MAX_LEN, FileMappingInfo}; - - let (path, _) = create_test_file("hyperlight_test_peb_entry_multiuse.bin", &[0xDD; 4096]); - - let guest_base: u64 = 0x1_0000_0000; - let label = "my_ramfs"; - - let mut sbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap() - .evolve() - .unwrap(); - - // Map with an explicit label. - let mapped_size = sbox.map_file_cow(&path, guest_base, Some(label)).unwrap(); - - // Read back the PEB file_mappings count. - let count = sbox - .mem_mgr - .shared_mem - .read::(sbox.mem_mgr.layout.get_file_mappings_size_offset()) - .unwrap(); - assert_eq!( - count, 1, - "PEB file_mappings count should be 1 after one mapping" - ); - - // Read back the first FileMappingInfo entry. - let entry_offset = sbox.mem_mgr.layout.get_file_mappings_array_offset(); - - let stored_addr = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, guest_addr)) - .unwrap(); - assert_eq!(stored_addr, guest_base, "PEB entry guest_addr should match"); - - let stored_size = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, size)) - .unwrap(); - assert_eq!( - stored_size, mapped_size, - "PEB entry size should match mapped_size" - ); - - // Read back the label bytes and verify. - let label_offset = entry_offset + offset_of!(FileMappingInfo, label); - let mut label_buf = [0u8; FILE_MAPPING_LABEL_MAX_LEN + 1]; - for (i, byte) in label_buf.iter_mut().enumerate() { - *byte = sbox - .mem_mgr - .shared_mem - .read::(label_offset + i) - .unwrap(); - } - let label_len = label_buf - .iter() - .position(|&b| b == 0) - .unwrap_or(label_buf.len()); - let stored_label = std::str::from_utf8(&label_buf[..label_len]).unwrap(); - assert_eq!(stored_label, label, "PEB entry label should match"); - - let _ = std::fs::remove_file(&path); - } - - /// Tests that deferred `map_file_cow` (before evolve) correctly - /// writes FileMappingInfo entries into the PEB during evolve. - #[test] - #[cfg(feature = "nanvix-unstable")] - fn test_map_file_cow_peb_entry_deferred() { - use std::mem::offset_of; - - use hyperlight_common::mem::{FILE_MAPPING_LABEL_MAX_LEN, FileMappingInfo}; - - let (path, _) = create_test_file("hyperlight_test_peb_entry_deferred.bin", &[0xEE; 4096]); - - let guest_base: u64 = 0x1_0000_0000; - let label = "deferred_fs"; - - let mut u_sbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - let mapped_size = u_sbox.map_file_cow(&path, guest_base, Some(label)).unwrap(); - - // Evolve — PEB entries should be written during this step. - let sbox: MultiUseSandbox = u_sbox.evolve().unwrap(); - - // Read back count. - let count = sbox - .mem_mgr - .shared_mem - .read::(sbox.mem_mgr.layout.get_file_mappings_size_offset()) - .unwrap(); - assert_eq!(count, 1, "PEB file_mappings count should be 1 after evolve"); - - // Read back the entry. - let entry_offset = sbox.mem_mgr.layout.get_file_mappings_array_offset(); - - let stored_addr = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, guest_addr)) - .unwrap(); - assert_eq!(stored_addr, guest_base); - - let stored_size = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, size)) - .unwrap(); - assert_eq!(stored_size, mapped_size); - - // Verify the label. - let label_offset = entry_offset + offset_of!(FileMappingInfo, label); - let mut label_buf = [0u8; FILE_MAPPING_LABEL_MAX_LEN + 1]; - for (i, byte) in label_buf.iter_mut().enumerate() { - *byte = sbox - .mem_mgr - .shared_mem - .read::(label_offset + i) - .unwrap(); - } - let label_len = label_buf - .iter() - .position(|&b| b == 0) - .unwrap_or(label_buf.len()); - let stored_label = std::str::from_utf8(&label_buf[..label_len]).unwrap(); - assert_eq!( - stored_label, label, - "PEB entry label should match after evolve" - ); - - let _ = std::fs::remove_file(&path); - } - - /// Tests that mapping 5 files (3 deferred + 2 post-evolve) correctly - /// populates all PEB FileMappingInfo slots with the right guest_addr, - /// size, and label for each entry. - #[test] - #[cfg(feature = "nanvix-unstable")] - fn test_map_file_cow_peb_multiple_entries() { - use std::mem::{offset_of, size_of}; - - use hyperlight_common::mem::{FILE_MAPPING_LABEL_MAX_LEN, FileMappingInfo}; - - const NUM_FILES: usize = 5; - const DEFERRED_COUNT: usize = 3; - - // Create 5 test files with distinct content. - let mut paths = Vec::new(); - let mut labels: Vec = Vec::new(); - for i in 0..NUM_FILES { - let name = format!("hyperlight_test_peb_multi_{}.bin", i); - let content = vec![i as u8 + 0xA0; 4096]; - let (path, _) = create_test_file(&name, &content); - paths.push(path); - labels.push(format!("file_{}", i)); - } - - // Each file gets a unique guest base, spaced 1 page apart - // (well outside the shared memory region). - let page_size = page_size::get() as u64; - let base: u64 = 0x1_0000_0000; - let guest_bases: Vec = (0..NUM_FILES as u64) - .map(|i| base + i * page_size) - .collect(); - - let mut u_sbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - // Map 3 files before evolve (deferred path). - let mut mapped_sizes = Vec::new(); - for i in 0..DEFERRED_COUNT { - let size = u_sbox - .map_file_cow(&paths[i], guest_bases[i], Some(&labels[i])) - .unwrap(); - mapped_sizes.push(size); - } - - // Evolve — deferred mappings applied + PEB entries written. - let mut sbox: MultiUseSandbox = u_sbox.evolve().unwrap(); - - // Map 2 more files post-evolve (MultiUseSandbox path). - for i in DEFERRED_COUNT..NUM_FILES { - let size = sbox - .map_file_cow(&paths[i], guest_bases[i], Some(&labels[i])) - .unwrap(); - mapped_sizes.push(size); - } - - // Verify PEB count equals 5. - let count = sbox - .mem_mgr - .shared_mem - .read::(sbox.mem_mgr.layout.get_file_mappings_size_offset()) - .unwrap(); - assert_eq!( - count, NUM_FILES as u64, - "PEB should have {NUM_FILES} entries" - ); - - // Verify each entry's guest_addr, size, and label. - let array_base = sbox.mem_mgr.layout.get_file_mappings_array_offset(); - for i in 0..NUM_FILES { - let entry_offset = array_base + i * size_of::(); - - let stored_addr = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, guest_addr)) - .unwrap(); - assert_eq!( - stored_addr, guest_bases[i], - "Entry {i}: guest_addr mismatch" - ); - - let stored_size = sbox - .mem_mgr - .shared_mem - .read::(entry_offset + offset_of!(FileMappingInfo, size)) - .unwrap(); - assert_eq!(stored_size, mapped_sizes[i], "Entry {i}: size mismatch"); - - // Read and verify the label. - let label_base = entry_offset + offset_of!(FileMappingInfo, label); - let mut label_buf = [0u8; FILE_MAPPING_LABEL_MAX_LEN + 1]; - for (j, byte) in label_buf.iter_mut().enumerate() { - *byte = sbox.mem_mgr.shared_mem.read::(label_base + j).unwrap(); - } - let label_len = label_buf - .iter() - .position(|&b| b == 0) - .unwrap_or(label_buf.len()); - let stored_label = std::str::from_utf8(&label_buf[..label_len]).unwrap(); - assert_eq!(stored_label, labels[i], "Entry {i}: label mismatch"); - } - - // Clean up. - for path in &paths { - let _ = std::fs::remove_file(path); - } - } - + /// Tests that an explicitly provided label exceeding 63 bytes is rejected. /// Tests that an explicitly provided label exceeding 63 bytes is rejected. #[test] fn test_map_file_cow_label_too_long() { diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 428594d37..37eb933e2 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -107,15 +107,6 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result Date: Tue, 14 Apr 2026 15:18:12 -0700 Subject: [PATCH 3/5] refactor: replace nanvix-unstable with i686-guest and guest-counter features Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Co-authored-by: danbugs --- Justfile | 6 ++--- src/hyperlight_common/Cargo.toml | 3 ++- src/hyperlight_common/src/layout.rs | 8 +++---- src/hyperlight_guest/Cargo.toml | 3 ++- src/hyperlight_guest/src/layout.rs | 2 +- src/hyperlight_host/Cargo.toml | 3 ++- src/hyperlight_host/build.rs | 6 ++--- .../src/hypervisor/hyperlight_vm/x86_64.rs | 18 +++++++------- .../hypervisor/regs/x86_64/special_regs.rs | 6 ++--- .../hypervisor/virtual_machine/kvm/x86_64.rs | 4 ++-- .../src/hypervisor/virtual_machine/mod.rs | 4 ++-- .../hypervisor/virtual_machine/mshv/x86_64.rs | 4 ++-- .../src/hypervisor/virtual_machine/whp.rs | 2 +- src/hyperlight_host/src/lib.rs | 2 +- src/hyperlight_host/src/mem/exe.rs | 6 +++++ src/hyperlight_host/src/mem/layout.rs | 11 ++++----- src/hyperlight_host/src/mem/memory_region.rs | 6 ++--- src/hyperlight_host/src/mem/mgr.rs | 14 +++++------ src/hyperlight_host/src/mem/shared_mem.rs | 2 +- src/hyperlight_host/src/sandbox/snapshot.rs | 11 +++++---- .../src/sandbox/uninitialized.rs | 24 +++++++++---------- .../src/sandbox/uninitialized_evolve.rs | 2 +- 22 files changed, 76 insertions(+), 71 deletions(-) diff --git a/Justfile b/Justfile index 8b0578c7f..aed410dcc 100644 --- a/Justfile +++ b/Justfile @@ -268,7 +268,7 @@ test-rust-tracing target=default-target features="": check-i686 target=default-target: cargo check -p hyperlight-common --target i686-unknown-linux-gnu --profile={{ if target == "debug" { "dev" } else { target } }} cargo check -p hyperlight-guest --target i686-unknown-linux-gnu --profile={{ if target == "debug" { "dev" } else { target } }} - cargo check -p hyperlight-common --target i686-unknown-linux-gnu --features nanvix-unstable --profile={{ if target == "debug" { "dev" } else { target } }} + cargo check -p hyperlight-common --target i686-unknown-linux-gnu --features i686-guest --profile={{ if target == "debug" { "dev" } else { target } }} # Verify that trace_guest correctly fails on i686 (compile_error should trigger) ! cargo check -p hyperlight-guest --target i686-unknown-linux-gnu --features trace_guest --profile={{ if target == "debug" { "dev" } else { target } }} 2>/dev/null @@ -291,8 +291,8 @@ check: {{ cargo-cmd }} check -p hyperlight-host --features print_debug {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features gdb {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features trace_guest,mem_profile {{ target-triple-flag }} - {{ cargo-cmd }} check -p hyperlight-host --features nanvix-unstable {{ target-triple-flag }} - {{ cargo-cmd }} check -p hyperlight-host --features nanvix-unstable,executable_heap {{ target-triple-flag }} + {{ cargo-cmd }} check -p hyperlight-host --features i686-guest {{ target-triple-flag }} + {{ cargo-cmd }} check -p hyperlight-host --features i686-guest,executable_heap {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features hw-interrupts {{ target-triple-flag }} fmt-check: (ensure-nightly-fmt) diff --git a/src/hyperlight_common/Cargo.toml b/src/hyperlight_common/Cargo.toml index 68ebcae71..32e7c0ce3 100644 --- a/src/hyperlight_common/Cargo.toml +++ b/src/hyperlight_common/Cargo.toml @@ -31,7 +31,8 @@ fuzzing = ["dep:arbitrary"] trace_guest = [] mem_profile = [] std = ["thiserror/std", "log/std", "tracing/std"] -nanvix-unstable = [] +i686-guest = [] +guest-counter = [] [lib] bench = false # see https://bheisler.github.io/criterion.rs/book/faq.html#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index 64b79d982..cbed0f1d6 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -16,11 +16,11 @@ limitations under the License. #[cfg_attr(target_arch = "x86", path = "arch/i686/layout.rs")] #[cfg_attr( - all(target_arch = "x86_64", not(feature = "nanvix-unstable")), + all(target_arch = "x86_64", not(feature = "i686-guest")), path = "arch/amd64/layout.rs" )] #[cfg_attr( - all(target_arch = "x86_64", feature = "nanvix-unstable"), + all(target_arch = "x86_64", feature = "i686-guest"), path = "arch/i686/layout.rs" )] #[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/layout.rs")] @@ -28,7 +28,7 @@ mod arch; pub use arch::{MAX_GPA, MAX_GVA}; #[cfg(any( - all(target_arch = "x86_64", not(feature = "nanvix-unstable")), + all(target_arch = "x86_64", not(feature = "i686-guest")), target_arch = "aarch64" ))] pub use arch::{SNAPSHOT_PT_GVA_MAX, SNAPSHOT_PT_GVA_MIN}; @@ -45,7 +45,7 @@ pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x20; /// counter falls in scratch page 0xffffe000 instead of the very last page /// 0xfffff000, which on i686 guests would require frame 0xfffff — exceeding the /// maximum representable frame number. -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] pub const SCRATCH_TOP_GUEST_COUNTER_OFFSET: u64 = 0x1008; pub fn scratch_base_gpa(size: usize) -> u64 { diff --git a/src/hyperlight_guest/Cargo.toml b/src/hyperlight_guest/Cargo.toml index 3c985d1dc..d9de514ae 100644 --- a/src/hyperlight_guest/Cargo.toml +++ b/src/hyperlight_guest/Cargo.toml @@ -24,4 +24,5 @@ hyperlight-guest-tracing = { workspace = true, default-features = false, optiona [features] default = [] trace_guest = ["dep:hyperlight-guest-tracing", "hyperlight-guest-tracing?/trace"] -nanvix-unstable = ["hyperlight-common/nanvix-unstable"] +i686-guest = ["hyperlight-common/i686-guest"] +guest-counter = ["hyperlight-common/guest-counter"] diff --git a/src/hyperlight_guest/src/layout.rs b/src/hyperlight_guest/src/layout.rs index c1f5839c0..74d03feed 100644 --- a/src/hyperlight_guest/src/layout.rs +++ b/src/hyperlight_guest/src/layout.rs @@ -35,7 +35,7 @@ pub fn snapshot_pt_gpa_base_gva() -> *mut u64 { pub use arch::{scratch_base_gpa, scratch_base_gva}; /// Returns a pointer to the guest counter u64 in scratch memory. -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] pub fn guest_counter_gva() -> *const u64 { use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_GUEST_COUNTER_OFFSET}; (MAX_GVA as u64 - SCRATCH_TOP_GUEST_COUNTER_OFFSET + 1) as *const u64 diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 97fac0b6c..494216179 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -137,7 +137,8 @@ hw-interrupts = [] gdb = ["dep:gdbstub", "dep:gdbstub_arch"] fuzzing = ["hyperlight-common/fuzzing"] build-metadata = ["dep:built"] -nanvix-unstable = ["hyperlight-common/nanvix-unstable"] +i686-guest = ["hyperlight-common/i686-guest"] +guest-counter = ["hyperlight-common/guest-counter"] [[bench]] name = "benchmarks" diff --git a/src/hyperlight_host/build.rs b/src/hyperlight_host/build.rs index 6f3f9587a..2e98d8f65 100644 --- a/src/hyperlight_host/build.rs +++ b/src/hyperlight_host/build.rs @@ -105,10 +105,8 @@ fn main() -> Result<()> { crashdump: { all(feature = "crashdump", target_arch = "x86_64") }, // print_debug feature is aliased with debug_assertions to make it only available in debug-builds. print_debug: { all(feature = "print_debug", debug_assertions) }, - // the nanvix-unstable and gdb features both (only - // temporarily!) need to use writable/un-shared snapshot - // memories, and so can't share - unshared_snapshot_mem: { any(feature = "nanvix-unstable", feature = "gdb") }, + // gdb needs writable snapshot memory for debug access. + unshared_snapshot_mem: { feature = "gdb" }, } #[cfg(feature = "build-metadata")] diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index 698ab49e5..6d1e1601d 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -100,10 +100,10 @@ impl HyperlightVm { None => return Err(CreateHyperlightVmError::NoHypervisorFound), }; - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] vm.set_sregs(&CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr)) .map_err(VmError::Register)?; - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "i686-guest")] vm.set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults()) .map_err(VmError::Register)?; @@ -251,15 +251,15 @@ impl HyperlightVm { /// Get the current base page table physical address. /// /// By default, reads CR3 from the vCPU special registers. - /// With `nanvix-unstable`, returns 0 (identity-mapped, no page tables). + /// With `i686-guest`, returns 0 (identity-mapped, no page tables). pub(crate) fn get_root_pt(&self) -> Result { - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] { let sregs = self.vm.sregs()?; // Mask off the flags bits Ok(sregs.cr3 & !0xfff_u64) } - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "i686-guest")] { Ok(0) } @@ -352,7 +352,7 @@ impl HyperlightVm { self.vm.set_debug_regs(&CommonDebugRegs::default())?; self.vm.reset_xsave()?; - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] { // Restore the full special registers from snapshot, but update CR3 // to point to the new (relocated) page tables @@ -361,11 +361,11 @@ impl HyperlightVm { self.pending_tlb_flush = true; self.vm.set_sregs(&sregs)?; } - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "i686-guest")] { let _ = (cr3, sregs); // suppress unused warnings // TODO: This is probably not correct. - // Let's deal with it when we clean up the nanvix-unstable feature + // Let's deal with it when we clean up the i686-guest feature self.vm .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; } @@ -874,7 +874,7 @@ pub(super) mod debug { } #[cfg(test)] -#[cfg(not(feature = "nanvix-unstable"))] +#[cfg(not(feature = "i686-guest"))] #[allow(clippy::needless_range_loop)] mod tests { use std::sync::{Arc, Mutex}; diff --git a/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs index e51f32bf2..7193d4f50 100644 --- a/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs +++ b/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs @@ -28,7 +28,7 @@ use windows::Win32::System::Hypervisor::*; use super::FromWhpRegisterError; cfg_if::cfg_if! { - if #[cfg(not(feature = "nanvix-unstable"))] { + if #[cfg(not(feature = "i686-guest"))] { pub(crate) const CR4_PAE: u64 = 1 << 5; pub(crate) const CR4_OSFXSR: u64 = 1 << 9; pub(crate) const CR4_OSXMMEXCPT: u64 = 1 << 10; @@ -69,7 +69,7 @@ pub(crate) struct CommonSpecialRegisters { } impl CommonSpecialRegisters { - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] pub(crate) fn standard_64bit_defaults(pml4_addr: u64) -> Self { CommonSpecialRegisters { cs: CommonSegmentRegister { @@ -104,7 +104,7 @@ impl CommonSpecialRegisters { } } - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "i686-guest")] pub(crate) fn standard_real_mode_defaults() -> Self { CommonSpecialRegisters { cs: CommonSegmentRegister { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index c29754e6c..db68dfdd0 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -36,7 +36,7 @@ use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, }; -#[cfg(all(test, not(feature = "nanvix-unstable")))] +#[cfg(all(test, not(feature = "i686-guest")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; #[cfg(feature = "hw-interrupts")] use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; @@ -446,7 +446,7 @@ impl VirtualMachine for KvmVm { } #[cfg(test)] - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { if std::mem::size_of_val(xsave) != XSAVE_BUFFER_SIZE { return Err(RegisterError::XsaveSizeMismatch { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index 9edaa1f87..ecb19a09f 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -112,7 +112,7 @@ pub(crate) const XSAVE_MIN_SIZE: usize = 576; /// Standard XSAVE buffer size (4KB) used by KVM and MSHV. /// WHP queries the required size dynamically. -#[cfg(all(any(kvm, mshv3), test, not(feature = "nanvix-unstable")))] +#[cfg(all(any(kvm, mshv3), test, not(feature = "i686-guest")))] pub(crate) const XSAVE_BUFFER_SIZE: usize = 4096; // Compiler error if no hypervisor type is available (not applicable on aarch64 yet) @@ -350,7 +350,7 @@ pub(crate) trait VirtualMachine: Debug + Send { fn reset_xsave(&self) -> std::result::Result<(), RegisterError>; /// Set xsave - only used for tests #[cfg(test)] - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError>; /// Get partition handle diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 0a768bd7a..27f024ca6 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -51,7 +51,7 @@ use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, }; -#[cfg(all(test, not(feature = "nanvix-unstable")))] +#[cfg(all(test, not(feature = "i686-guest")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; #[cfg(feature = "hw-interrupts")] use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; @@ -445,7 +445,7 @@ impl VirtualMachine for MshvVm { } #[cfg(test)] - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { if std::mem::size_of_val(xsave) != XSAVE_BUFFER_SIZE { return Err(RegisterError::XsaveSizeMismatch { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 3c6ae5a9d..3cc5cc4f2 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -746,7 +746,7 @@ impl VirtualMachine for WhpVm { } #[cfg(test)] - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { // Get the required buffer size by calling with NULL buffer. // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns diff --git a/src/hyperlight_host/src/lib.rs b/src/hyperlight_host/src/lib.rs index 928f82cd2..311c0e989 100644 --- a/src/hyperlight_host/src/lib.rs +++ b/src/hyperlight_host/src/lib.rs @@ -95,7 +95,7 @@ pub use sandbox::UninitializedSandbox; /// The re-export for the `GuestBinary` type pub use sandbox::uninitialized::GuestBinary; /// The re-export for the `GuestCounter` type -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] pub use sandbox::uninitialized::GuestCounter; /// The universal `Result` type used throughout the Hyperlight codebase. diff --git a/src/hyperlight_host/src/mem/exe.rs b/src/hyperlight_host/src/mem/exe.rs index c201592f2..97874ae6e 100644 --- a/src/hyperlight_host/src/mem/exe.rs +++ b/src/hyperlight_host/src/mem/exe.rs @@ -88,6 +88,12 @@ impl ExeInfo { ExeInfo::Elf(elf) => Offset::from(elf.entrypoint_va()), } } + /// Returns the base virtual address of the loaded binary (lowest PT_LOAD p_vaddr). + pub fn base_va(&self) -> u64 { + match self { + ExeInfo::Elf(elf) => elf.get_base_va(), + } + } pub fn loaded_size(&self) -> usize { match self { ExeInfo::Elf(elf) => elf.get_va_size(), diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 2177848c8..fd8677392 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -140,7 +140,7 @@ impl<'a> ResolvedGpa<&'a [u8], &'a [u8]> { } } #[cfg(any(gdb, feature = "mem_profile"))] -#[allow(unused)] // may be unused when nanvix-unstable is also enabled +#[allow(unused)] // may be unused when i686-guest is also enabled pub(crate) trait ReadableSharedMemory { fn copy_to_slice(&self, slice: &mut [u8], offset: usize) -> Result<()>; } @@ -178,7 +178,7 @@ impl ReadableSharedMemory for T { } #[cfg(any(gdb, feature = "mem_profile"))] impl ResolvedGpa { - #[allow(unused)] // may be unused when nanvix-unstable is also enabled + #[allow(unused)] // may be unused when i686-guest is also enabled pub(crate) fn copy_to_slice(&self, slice: &mut [u8]) -> Result<()> { match &self.base { BaseGpaRegion::Snapshot(sn) => sn.copy_to_slice(slice, self.offset), @@ -237,7 +237,7 @@ pub(crate) struct SandboxMemoryLayout { code_size: usize, // The offset in the sandbox memory where the code starts guest_code_offset: usize, - #[cfg_attr(feature = "nanvix-unstable", allow(unused))] + #[cfg_attr(feature = "i686-guest", allow(unused))] pub(crate) init_data_permissions: Option, // The size of the scratch region in physical memory; note that @@ -309,10 +309,7 @@ impl SandboxMemoryLayout { const MAX_MEMORY_SIZE: usize = (16 * 1024 * 1024 * 1024) - Self::BASE_ADDRESS; // 16 GiB - BASE_ADDRESS /// The base address of the sandbox's memory. - #[cfg(not(feature = "nanvix-unstable"))] pub(crate) const BASE_ADDRESS: usize = 0x1000; - #[cfg(feature = "nanvix-unstable")] - pub(crate) const BASE_ADDRESS: usize = 0x0; // the offset into a sandbox's input/output buffer where the stack starts pub(crate) const STACK_POINTER_SIZE_BYTES: u64 = 8; @@ -566,7 +563,7 @@ impl SandboxMemoryLayout { /// Returns the memory regions associated with this memory layout, /// suitable for passing to a hypervisor for mapping into memory - #[cfg_attr(feature = "nanvix-unstable", allow(unused))] + #[cfg_attr(feature = "i686-guest", allow(unused))] pub(crate) fn get_memory_regions_( &self, host_base: K::HostBaseType, diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 979b260dd..615fe9cac 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -276,7 +276,7 @@ impl MemoryRegionKind for HostGuestMemoryRegion { /// Type for memory regions that only track guest addresses. /// -#[cfg_attr(feature = "nanvix-unstable", allow(dead_code))] +#[cfg_attr(feature = "i686-guest", allow(dead_code))] #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] pub(crate) struct GuestMemoryRegion {} @@ -329,7 +329,7 @@ impl MemoryRegionKind for CrashDumpMemoryRegion { #[cfg(crashdump)] pub(crate) type CrashDumpRegion = MemoryRegion_; -#[cfg(all(crashdump, feature = "nanvix-unstable"))] +#[cfg(all(crashdump, feature = "i686-guest"))] impl HostGuestMemoryRegion { /// Extract the raw `usize` host address from the platform-specific /// host base type. @@ -349,7 +349,7 @@ impl HostGuestMemoryRegion { } } -#[cfg_attr(feature = "nanvix-unstable", allow(unused))] +#[cfg_attr(feature = "i686-guest", allow(unused))] pub(crate) struct MemoryRegionVecBuilder { guest_base_phys_addr: usize, host_base_virt_addr: K::HostBaseType, diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index dddab493a..4aa494b5a 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -21,7 +21,7 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{ use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult; use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData; use hyperlight_common::vmem::{self, PAGE_TABLE_SIZE, PageTableEntry, PhysAddr}; -#[cfg(all(feature = "crashdump", not(feature = "nanvix-unstable")))] +#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] use hyperlight_common::vmem::{BasicMapping, MappingKind}; use tracing::{Span, instrument}; @@ -36,7 +36,7 @@ use crate::mem::memory_region::{CrashDumpRegion, MemoryRegionFlags, MemoryRegion use crate::sandbox::snapshot::{NextAction, Snapshot}; use crate::{Result, new_error}; -#[cfg(all(feature = "crashdump", not(feature = "nanvix-unstable")))] +#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] fn mapping_kind_to_flags(kind: &MappingKind) -> (MemoryRegionFlags, MemoryRegionType) { match kind { MappingKind::Basic(BasicMapping { @@ -74,7 +74,7 @@ fn mapping_kind_to_flags(kind: &MappingKind) -> (MemoryRegionFlags, MemoryRegion /// in both guest and host address space and has the same flags. /// /// Returns `true` if the region was coalesced, `false` if a new region is needed. -#[cfg(all(feature = "crashdump", not(feature = "nanvix-unstable")))] +#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] fn try_coalesce_region( regions: &mut [CrashDumpRegion], virt_base: usize, @@ -99,7 +99,7 @@ fn try_coalesce_region( // fact that the snapshot shared memory is `ReadonlySharedMemory` // normally, but there is (temporary) support for writable // `GuestSharedMemory` with `#[cfg(feature = -// "nanvix-unstable")]`. Unfortunately, rustc gets annoyed about an +// "i686-guest")]`. Unfortunately, rustc gets annoyed about an // unused type parameter, unless one goes to a little bit of effort to // trick it... mod unused_hack { @@ -516,7 +516,7 @@ impl SandboxMemoryManager { /// /// By default, walks the guest page tables to discover /// GVA→GPA mappings and translates them to host-backed regions. - #[cfg(all(feature = "crashdump", not(feature = "nanvix-unstable")))] + #[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] pub(crate) fn get_guest_memory_regions( &mut self, root_pt: u64, @@ -578,7 +578,7 @@ impl SandboxMemoryManager { /// Without paging, GVA == GPA (identity mapped), so we return the /// snapshot and scratch regions directly at their known addresses /// alongside any dynamic mmap regions. - #[cfg(all(feature = "crashdump", feature = "nanvix-unstable"))] + #[cfg(all(feature = "crashdump", feature = "i686-guest"))] pub(crate) fn get_guest_memory_regions( &mut self, _root_pt: u64, @@ -733,7 +733,7 @@ impl SandboxMemoryManager { } #[cfg(test)] -#[cfg(all(not(feature = "nanvix-unstable"), target_arch = "x86_64"))] +#[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] mod tests { use hyperlight_common::vmem::{MappingKind, PAGE_TABLE_SIZE}; use hyperlight_testing::sandbox_sizes::{LARGE_HEAP_SIZE, MEDIUM_HEAP_SIZE, SMALL_HEAP_SIZE}; diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index b978b3475..d22dd6702 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -668,7 +668,7 @@ impl ExclusiveSharedMemory { /// Create a [`HostSharedMemory`] view of this region without /// consuming `self`. Used in tests where the full `build()` / /// `evolve()` pipeline is not available. - #[cfg(all(test, feature = "nanvix-unstable"))] + #[cfg(all(test, feature = "guest-counter"))] pub(crate) fn as_host_shared_memory(&self) -> HostSharedMemory { let lock = Arc::new(RwLock::new(())); HostSharedMemory { diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index c5f0520a6..3bf44ff97 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -257,7 +257,7 @@ fn filtered_mappings<'a>( return None; } // neither does the mapping of the snapshot's own page tables - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 { @@ -342,7 +342,7 @@ impl Snapshot { let guest_blob_size = blob.as_ref().map(|b| b.data.len()).unwrap_or(0); let guest_blob_mem_flags = blob.as_ref().map(|b| b.permissions); - #[cfg_attr(feature = "nanvix-unstable", allow(unused_mut))] + #[cfg_attr(feature = "i686-guest", allow(unused_mut))] let mut layout = crate::mem::layout::SandboxMemoryLayout::new( cfg, exe_info.loaded_size(), @@ -351,7 +351,8 @@ impl Snapshot { )?; let load_addr = layout.get_guest_code_address() as u64; - let entrypoint_offset: u64 = exe_info.entrypoint().into(); + let base_va = exe_info.base_va(); + let entrypoint_va: u64 = exe_info.entrypoint().into(); let mut memory = vec![0; layout.get_memory_size()?]; @@ -365,7 +366,7 @@ impl Snapshot { blob.map(|x| layout.write_init_data(&mut memory, x.data)) .transpose()?; - #[cfg(not(feature = "nanvix-unstable"))] + #[cfg(not(feature = "i686-guest"))] { // Set up page table entries for the snapshot let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); @@ -422,7 +423,7 @@ impl Snapshot { hash, stack_top_gva: exn_stack_top_gva, sregs: None, - entrypoint: NextAction::Initialise(load_addr + entrypoint_offset), + entrypoint: NextAction::Initialise(load_addr + entrypoint_va - base_va), }) } diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index e737d08da..23c01be28 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -31,7 +31,7 @@ use crate::func::{ParameterTuple, SupportedReturnType}; use crate::log_build_details; use crate::mem::memory_region::{DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegionFlags}; use crate::mem::mgr::SandboxMemoryManager; -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] use crate::mem::shared_mem::HostSharedMemory; use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; use crate::sandbox::SandboxConfiguration; @@ -76,26 +76,26 @@ pub(crate) struct SandboxRuntimeConfig { /// /// Only one `GuestCounter` may be created per sandbox; a second call to /// [`UninitializedSandbox::guest_counter()`] returns an error. -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] pub struct GuestCounter { inner: Mutex, } -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] struct GuestCounterInner { deferred_hshm: Arc>>, offset: usize, value: u64, } -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] impl core::fmt::Debug for GuestCounter { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("GuestCounter").finish_non_exhaustive() } } -#[cfg(feature = "nanvix-unstable")] +#[cfg(feature = "guest-counter")] impl GuestCounter { /// Increments the counter by one and writes it to guest memory. pub fn increment(&self) -> Result<()> { @@ -174,12 +174,12 @@ pub struct UninitializedSandbox { /// view of scratch memory. Code that needs host-style volatile access /// before `evolve()` (e.g. `GuestCounter`) can clone this `Arc` and /// will see `Some` once `evolve()` completes. - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] pub(crate) deferred_hshm: Arc>>, /// Set to `true` once a [`GuestCounter`] has been handed out via /// [`guest_counter()`](Self::guest_counter). Prevents creating /// multiple counters that would have divergent cached values. - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] counter_taken: std::sync::atomic::AtomicBool, /// File mappings prepared by [`Self::map_file_cow`] that will be /// applied to the VM during [`Self::evolve`]. @@ -287,7 +287,7 @@ impl UninitializedSandbox { /// /// This method can only be called once; a second call returns an error /// because multiple counters would have divergent cached values. - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] pub fn guest_counter(&mut self) -> Result { use std::sync::atomic::Ordering; @@ -376,9 +376,9 @@ impl UninitializedSandbox { rt_cfg, load_info: snapshot.load_info(), stack_top_gva: snapshot.stack_top_gva(), - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] deferred_hshm: Arc::new(Mutex::new(None)), - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] counter_taken: std::sync::atomic::AtomicBool::new(false), pending_file_mappings: Vec::new(), }; @@ -552,7 +552,7 @@ impl UninitializedSandbox { /// Populate the deferred `HostSharedMemory` slot without running /// the full `evolve()` pipeline. Used in tests where guest boot /// is not available. - #[cfg(all(test, feature = "nanvix-unstable"))] + #[cfg(all(test, feature = "guest-counter"))] fn simulate_build(&self) { let hshm = self.mgr.scratch_mem.as_host_shared_memory(); #[allow(clippy::unwrap_used)] @@ -1569,7 +1569,7 @@ mod tests { } } - #[cfg(feature = "nanvix-unstable")] + #[cfg(feature = "guest-counter")] mod guest_counter_tests { use hyperlight_testing::simple_guest_as_string; diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 37eb933e2..f396710b7 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -41,7 +41,7 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result Date: Tue, 14 Apr 2026 15:26:09 -0700 Subject: [PATCH 4/5] feat: i686 protected-mode boot and unified restore path Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Co-authored-by: danbugs --- .../src/hypervisor/hyperlight_vm/x86_64.rs | 47 ++++-------- .../hypervisor/regs/x86_64/special_regs.rs | 74 ++++++++++++------- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index 6d1e1601d..ce9730813 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -104,8 +104,10 @@ impl HyperlightVm { vm.set_sregs(&CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr)) .map_err(VmError::Register)?; #[cfg(feature = "i686-guest")] - vm.set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults()) - .map_err(VmError::Register)?; + vm.set_sregs(&CommonSpecialRegisters::standard_32bit_paging_defaults( + _pml4_addr, + )) + .map_err(VmError::Register)?; #[cfg(any(kvm, mshv3))] let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { @@ -248,21 +250,11 @@ impl HyperlightVm { Ok(()) } - /// Get the current base page table physical address. - /// - /// By default, reads CR3 from the vCPU special registers. - /// With `i686-guest`, returns 0 (identity-mapped, no page tables). + /// Get the current base page table physical address from CR3. pub(crate) fn get_root_pt(&self) -> Result { - #[cfg(not(feature = "i686-guest"))] - { - let sregs = self.vm.sregs()?; - // Mask off the flags bits - Ok(sregs.cr3 & !0xfff_u64) - } - #[cfg(feature = "i686-guest")] - { - Ok(0) - } + let sregs = self.vm.sregs()?; + // Mask off the flags bits + Ok(sregs.cr3 & !0xfff_u64) } /// Get the special registers that need to be stored in a snapshot. @@ -352,23 +344,12 @@ impl HyperlightVm { self.vm.set_debug_regs(&CommonDebugRegs::default())?; self.vm.reset_xsave()?; - #[cfg(not(feature = "i686-guest"))] - { - // Restore the full special registers from snapshot, but update CR3 - // to point to the new (relocated) page tables - let mut sregs = *sregs; - sregs.cr3 = cr3; - self.pending_tlb_flush = true; - self.vm.set_sregs(&sregs)?; - } - #[cfg(feature = "i686-guest")] - { - let _ = (cr3, sregs); // suppress unused warnings - // TODO: This is probably not correct. - // Let's deal with it when we clean up the i686-guest feature - self.vm - .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; - } + // Restore the full special registers from snapshot, but update CR3 + // to point to the new (relocated) page tables + let mut sregs = *sregs; + sregs.cr3 = cr3; + self.pending_tlb_flush = true; + self.vm.set_sregs(&sregs)?; Ok(()) } diff --git a/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs index 7193d4f50..edd4963af 100644 --- a/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs +++ b/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs @@ -104,36 +104,54 @@ impl CommonSpecialRegisters { } } + /// Returns special registers for 32-bit protected mode with paging enabled. + /// Used for i686 guests that need CoW page tables from boot. #[cfg(feature = "i686-guest")] - pub(crate) fn standard_real_mode_defaults() -> Self { + pub(crate) fn standard_32bit_paging_defaults(pd_addr: u64) -> Self { + // Flat 32-bit code segment: base=0, limit=4GB, 32-bit, executable + let code_seg = CommonSegmentRegister { + base: 0, + selector: 0x08, + limit: 0xFFFFFFFF, + type_: 11, // Execute/Read, Accessed + present: 1, + s: 1, + db: 1, // 32-bit + g: 1, // 4KB granularity + ..Default::default() + }; + // Flat 32-bit data segment: base=0, limit=4GB, 32-bit, writable + let data_seg = CommonSegmentRegister { + base: 0, + selector: 0x10, + limit: 0xFFFFFFFF, + type_: 3, // Read/Write, Accessed + present: 1, + s: 1, + db: 1, // 32-bit + g: 1, // 4KB granularity + ..Default::default() + }; + let tr_seg = CommonSegmentRegister { + base: 0, + selector: 0, + limit: 0xFFFF, + type_: 11, + present: 1, + s: 0, + ..Default::default() + }; CommonSpecialRegisters { - cs: CommonSegmentRegister { - base: 0, - selector: 0, - limit: 0xFFFF, - type_: 11, - present: 1, - s: 1, - ..Default::default() - }, - ds: CommonSegmentRegister { - base: 0, - selector: 0, - limit: 0xFFFF, - type_: 3, - present: 1, - s: 1, - ..Default::default() - }, - tr: CommonSegmentRegister { - base: 0, - selector: 0, - limit: 0xFFFF, - type_: 11, - present: 1, - s: 0, - ..Default::default() - }, + cs: code_seg, + ds: data_seg, + es: data_seg, + ss: data_seg, + fs: data_seg, + gs: data_seg, + tr: tr_seg, + cr0: 0x80010011, // PE + ET + WP (write-protect for CoW) + PG + cr3: pd_addr, + cr4: 0, // No PAE, no PSE ..Default::default() } } From 9ce14bc1bd1f28bf9b0aa2690e834d031af24a14 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:48:54 -0700 Subject: [PATCH 5/5] feat: i686 page tables, snapshot compaction, and CoW support Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Co-authored-by: danbugs --- src/hyperlight_common/src/arch/amd64/vmem.rs | 66 ++ src/hyperlight_common/src/arch/i686/layout.rs | 11 +- src/hyperlight_common/src/layout.rs | 12 + src/hyperlight_common/src/vmem.rs | 2 + src/hyperlight_host/src/mem/mgr.rs | 33 +- .../src/sandbox/initialized_multi_use.rs | 61 +- src/hyperlight_host/src/sandbox/snapshot.rs | 661 +++++++++++++++--- 7 files changed, 754 insertions(+), 92 deletions(-) diff --git a/src/hyperlight_common/src/arch/amd64/vmem.rs b/src/hyperlight_common/src/arch/amd64/vmem.rs index 9505dda16..a90314646 100644 --- a/src/hyperlight_common/src/arch/amd64/vmem.rs +++ b/src/hyperlight_common/src/arch/amd64/vmem.rs @@ -596,6 +596,72 @@ pub type PageTableEntry = u64; pub type VirtAddr = u64; pub type PhysAddr = u64; +/// i686 guest page-table walker and PTE constants for the x86_64 host. +/// +/// When the host builds with `i686-guest`, it needs to walk 2-level i686 +/// page tables in guest memory. The `arch/i686/vmem.rs` module only compiles +/// for `target_arch = "x86"` (the guest side), so the host-side walker lives +/// here, gated behind the feature flag. +#[cfg(feature = "i686-guest")] +pub mod i686_guest { + use alloc::vec::Vec; + + use crate::vmem::{BasicMapping, CowMapping, Mapping, MappingKind, TableReadOps}; + + pub const PAGE_PRESENT: u64 = 1; + pub const PAGE_RW: u64 = 1 << 1; + pub const PAGE_USER: u64 = 1 << 2; + pub const PAGE_ACCESSED: u64 = 1 << 5; + pub const PAGE_AVL_COW: u64 = 1 << 9; + pub const PTE_ADDR_MASK: u64 = 0xFFFFF000; + + /// Walk an i686 2-level page table and return all present mappings. + /// + /// # Safety + /// The caller must ensure that `op` provides valid page table memory. + pub unsafe fn virt_to_phys_all(op: &Op) -> Vec { + let root = op.root_table(); + let mut mappings = Vec::new(); + for pdi in 0..1024u64 { + let pde_ptr = Op::entry_addr(root, pdi * 4); + let pde: u64 = unsafe { op.read_entry(pde_ptr) }; + if (pde & PAGE_PRESENT) == 0 { + continue; + } + let pt_phys = pde & PTE_ADDR_MASK; + let pt_base = Op::from_phys(pt_phys as crate::vmem::PhysAddr); + for pti in 0..1024u64 { + let pte_ptr = Op::entry_addr(pt_base, pti * 4); + let pte: u64 = unsafe { op.read_entry(pte_ptr) }; + if (pte & PAGE_PRESENT) == 0 { + continue; + } + let phys_base = pte & PTE_ADDR_MASK; + let virt_base = (pdi << 22) | (pti << 12); + let kind = if (pte & PAGE_AVL_COW) != 0 { + MappingKind::Cow(CowMapping { + readable: true, + executable: true, + }) + } else { + MappingKind::Basic(BasicMapping { + readable: true, + writable: (pte & PAGE_RW) != 0, + executable: true, + }) + }; + mappings.push(Mapping { + phys_base, + virt_base, + len: super::PAGE_SIZE as u64, + kind, + }); + } + } + mappings + } +} + #[cfg(test)] mod tests { use alloc::vec; diff --git a/src/hyperlight_common/src/arch/i686/layout.rs b/src/hyperlight_common/src/arch/i686/layout.rs index f3601c643..85fbff91b 100644 --- a/src/hyperlight_common/src/arch/i686/layout.rs +++ b/src/hyperlight_common/src/arch/i686/layout.rs @@ -14,12 +14,15 @@ See the License for the specific language governing permissions and limitations under the License. */ -// This file is just dummy definitions at the moment, in order to -// allow compiling the guest for real mode boot scenarios. +// i686 layout constants for 32-bit protected mode with paging. pub const MAX_GVA: usize = 0xffff_ffff; pub const MAX_GPA: usize = 0xffff_ffff; -pub fn min_scratch_size(_input_data_size: usize, _output_data_size: usize) -> usize { - crate::vmem::PAGE_SIZE +/// Minimum scratch region size: IO buffers (page-aligned) plus 12 pages +/// for bookkeeping and the exception stack. Page table space is validated +/// separately by `set_pt_size()`. +pub fn min_scratch_size(input_data_size: usize, output_data_size: usize) -> usize { + (input_data_size + output_data_size).next_multiple_of(crate::vmem::PAGE_SIZE) + + 12 * crate::vmem::PAGE_SIZE } diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index cbed0f1d6..3fe6bfc80 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -39,6 +39,18 @@ pub const SCRATCH_TOP_ALLOCATOR_OFFSET: u64 = 0x10; pub const SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET: u64 = 0x18; pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x20; +/// Offset from the top of scratch for the number of active page directory roots. +/// The guest writes this before signaling boot-complete so the host can walk +/// all active PDs during snapshot creation (not just CR3). +#[cfg(feature = "i686-guest")] +pub const SCRATCH_TOP_PD_ROOTS_COUNT_OFFSET: u64 = 0x28; +/// Offset from the top of scratch for the PD roots array (u32 GPAs on i686). +#[cfg(feature = "i686-guest")] +pub const SCRATCH_TOP_PD_ROOTS_ARRAY_OFFSET: u64 = 0x30; +/// Maximum number of PD roots the guest can expose to the host. +#[cfg(feature = "i686-guest")] +pub const MAX_PD_ROOTS: usize = 32; + /// Offset from the top of scratch memory for a shared host-guest u64 counter. /// /// This is placed at 0x1008 (rather than the next sequential 0x28) so that the diff --git a/src/hyperlight_common/src/vmem.rs b/src/hyperlight_common/src/vmem.rs index 96de9f334..4f3a5eed7 100644 --- a/src/hyperlight_common/src/vmem.rs +++ b/src/hyperlight_common/src/vmem.rs @@ -22,6 +22,8 @@ mod arch; /// This is always the page size that the /guest/ is being compiled /// for, which may or may not be the same as the host page size. pub use arch::PAGE_SIZE; +#[cfg(all(feature = "i686-guest", target_arch = "x86_64"))] +pub use arch::i686_guest; pub use arch::{PAGE_TABLE_SIZE, PageTableEntry, PhysAddr, VirtAddr}; pub const PAGE_TABLE_ENTRIES_PER_TABLE: usize = PAGE_TABLE_SIZE / core::mem::size_of::(); diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 4aa494b5a..b0074501b 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -20,6 +20,7 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{ }; use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult; use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData; +#[cfg(not(feature = "i686-guest"))] use hyperlight_common::vmem::{self, PAGE_TABLE_SIZE, PageTableEntry, PhysAddr}; #[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] use hyperlight_common::vmem::{BasicMapping, MappingKind}; @@ -148,11 +149,13 @@ pub(crate) struct SandboxMemoryManager { pub(crate) abort_buffer: Vec, } +#[cfg(not(feature = "i686-guest"))] pub(crate) struct GuestPageTableBuffer { buffer: std::cell::RefCell>, phys_base: usize, } +#[cfg(not(feature = "i686-guest"))] impl vmem::TableReadOps for GuestPageTableBuffer { type TableAddr = (usize, usize); // (table_index, entry_index) @@ -187,6 +190,7 @@ impl vmem::TableReadOps for GuestPageTableBuffer { (self.phys_base / PAGE_TABLE_SIZE, 0) } } +#[cfg(not(feature = "i686-guest"))] impl vmem::TableOps for GuestPageTableBuffer { type TableMovability = vmem::MayNotMoveTable; @@ -217,6 +221,7 @@ impl vmem::TableOps for GuestPageTableBuffer { } } +#[cfg(not(feature = "i686-guest"))] impl GuestPageTableBuffer { pub(crate) fn new(phys_base: usize) -> Self { GuestPageTableBuffer { @@ -268,7 +273,7 @@ where &mut self, sandbox_id: u64, mapped_regions: Vec, - root_pt_gpa: u64, + root_pt_gpas: &[u64], rsp_gva: u64, sregs: CommonSpecialRegisters, entrypoint: NextAction, @@ -280,7 +285,7 @@ where self.layout, crate::mem::exe::LoadInfo::dummy(), mapped_regions, - root_pt_gpa, + root_pt_gpas, rsp_gva, sregs, entrypoint, @@ -332,6 +337,7 @@ impl SandboxMemoryManager { abort_buffer: Vec::new(), // Guest doesn't need abort buffer }; host_mgr.update_scratch_bookkeeping()?; + host_mgr.copy_pt_to_scratch()?; Ok((host_mgr, guest_mgr)) } } @@ -461,6 +467,18 @@ impl SandboxMemoryManager { }; self.layout = *snapshot.layout(); self.update_scratch_bookkeeping()?; + // i686 snapshots store PT bytes separately (not appended to shared_mem) + // to avoid overlapping with map_file_cow regions. + // x86_64 snapshots have PTs appended to shared_mem. + #[cfg(feature = "i686-guest")] + { + let sep_pt = snapshot.separate_pt_bytes(); + self.scratch_mem.with_exclusivity(|scratch| { + scratch.copy_from_slice(sep_pt, self.layout.get_pt_base_scratch_offset()) + })??; + } + #[cfg(not(feature = "i686-guest"))] + self.copy_pt_to_scratch()?; Ok((gsnapshot, gscratch)) } @@ -479,6 +497,10 @@ impl SandboxMemoryManager { SCRATCH_TOP_ALLOCATOR_OFFSET, self.layout.get_first_free_scratch_gpa(), )?; + self.update_scratch_bookkeeping_item( + SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET, + self.layout.get_pt_base_gpa(), + )?; // Initialise the guest input and output data buffers in // scratch memory. TODO: remove the need for this. @@ -491,7 +513,11 @@ impl SandboxMemoryManager { SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES, )?; - // Copy the page tables into the scratch region + Ok(()) + } + + /// Copy page tables from shared_mem into the scratch region. + fn copy_pt_to_scratch(&mut self) -> Result<()> { let snapshot_pt_end = self.shared_mem.mem_size(); let snapshot_pt_size = self.layout.get_pt_size(); let snapshot_pt_start = snapshot_pt_end - snapshot_pt_size; @@ -508,7 +534,6 @@ impl SandboxMemoryManager { #[allow(clippy::needless_borrow)] scratch.copy_from_slice(&bytes, self.layout.get_pt_base_scratch_offset()) })??; - Ok(()) } diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index bbc4806f0..fd089256c 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -160,10 +160,16 @@ impl MultiUseSandbox { } let mapped_regions_iter = self.vm.get_mapped_regions(); let mapped_regions_vec: Vec = mapped_regions_iter.cloned().collect(); - let root_pt_gpa = self + // Discover page table roots. For i686 guests, read the PD roots + // table from scratch bookkeeping. For x86_64, just use CR3. + #[cfg(feature = "i686-guest")] + let root_pt_gpas = self.read_pd_roots_from_scratch()?; + #[cfg(not(feature = "i686-guest"))] + let root_pt_gpas = [self .vm .get_root_pt() - .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; + .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?]; + let stack_top_gpa = self.vm.get_stack_top(); let sregs = self .vm @@ -173,7 +179,7 @@ impl MultiUseSandbox { let memory_snapshot = self.mem_mgr.snapshot( self.id, mapped_regions_vec, - root_pt_gpa, + &root_pt_gpas, stack_top_gpa, sregs, entrypoint, @@ -183,6 +189,54 @@ impl MultiUseSandbox { Ok(snapshot) } + /// Reads the PD roots table from the scratch bookkeeping area. + /// Returns an error if the guest did not write valid PD roots + /// before signaling boot-complete. + #[cfg(feature = "i686-guest")] + fn read_pd_roots_from_scratch(&mut self) -> Result> { + use hyperlight_common::layout::{ + MAX_PD_ROOTS, SCRATCH_TOP_PD_ROOTS_ARRAY_OFFSET, SCRATCH_TOP_PD_ROOTS_COUNT_OFFSET, + }; + + let scratch_size = self.mem_mgr.layout.get_scratch_size(); + let count_off = scratch_size - SCRATCH_TOP_PD_ROOTS_COUNT_OFFSET as usize; + let array_off = scratch_size - SCRATCH_TOP_PD_ROOTS_ARRAY_OFFSET as usize; + + self.mem_mgr.scratch_mem.with_contents(|scratch| { + let count = scratch + .get(count_off..count_off + 4) + .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .unwrap_or(0) as usize; + + if count == 0 { + return Err(crate::new_error!( + "i686 guest did not write PD roots to scratch bookkeeping (count=0)" + )); + } + if count > MAX_PD_ROOTS { + return Err(crate::new_error!( + "i686 guest wrote invalid PD roots count: {} (max {})", + count, + MAX_PD_ROOTS + )); + } + + let mut roots = Vec::with_capacity(count); + for i in 0..count { + let off = array_off + i * 4; + let b = scratch.get(off..off + 4).ok_or_else(|| { + crate::new_error!("PD root {} at offset {} is out of scratch bounds", i, off) + })?; + let gpa = u32::from_le_bytes([b[0], b[1], b[2], b[3]]); + if gpa == 0 { + return Err(crate::new_error!("PD root {} has GPA 0", i)); + } + roots.push(gpa as u64); + } + Ok(roots) + })? + } + /// Restores the sandbox's memory to a previously captured snapshot state. /// /// The snapshot must have been created from this same sandbox instance. @@ -2109,7 +2163,6 @@ mod tests { let _ = std::fs::remove_file(&path); } - /// Tests that an explicitly provided label exceeding 63 bytes is rejected. /// Tests that an explicitly provided label exceeding 63 bytes is rejected. #[test] fn test_map_file_cow_label_too_long() { diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index 3bf44ff97..879c31e27 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -16,8 +16,12 @@ limitations under the License. use std::sync::atomic::{AtomicU64, Ordering}; -use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; -use hyperlight_common::vmem::{self, BasicMapping, CowMapping, Mapping, MappingKind, PAGE_SIZE}; +#[cfg(not(feature = "i686-guest"))] +use hyperlight_common::layout::scratch_base_gpa; +use hyperlight_common::layout::scratch_base_gva; +#[cfg(not(feature = "i686-guest"))] +use hyperlight_common::vmem::{self, BasicMapping, CowMapping}; +use hyperlight_common::vmem::{Mapping, MappingKind, PAGE_SIZE}; use tracing::{Span, instrument}; use crate::HyperlightError::MemoryRegionSizeMismatch; @@ -26,7 +30,9 @@ use crate::hypervisor::regs::CommonSpecialRegisters; use crate::mem::exe::LoadInfo; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::MemoryRegion; -use crate::mem::mgr::{GuestPageTableBuffer, SnapshotSharedMemory}; +#[cfg(not(feature = "i686-guest"))] +use crate::mem::mgr::GuestPageTableBuffer; +use crate::mem::mgr::SnapshotSharedMemory; use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::uninitialized::{GuestBinary, GuestEnvironment}; @@ -74,6 +80,10 @@ pub struct Snapshot { /// The memory regions that were mapped when this snapshot was /// taken (excluding initial sandbox regions) regions: Vec, + /// Separate PT storage for i686 snapshots where PTs are stored + /// outside the main snapshot memory to avoid overlap with map_file_cow. + #[cfg(feature = "i686-guest")] + separate_pt_bytes: Vec, /// Extra debug information about the binary in this snapshot, /// from when the binary was first loaded into the snapshot. /// @@ -189,7 +199,12 @@ pub(crate) struct SharedMemoryPageTableBuffer<'a> { scratch: &'a [u8], layout: SandboxMemoryLayout, root: u64, + /// CoW resolution map: maps snapshot GPAs to their CoW'd scratch GPAs. + /// Built by walking the kernel PD to find pages that were CoW'd during boot. + #[cfg(feature = "i686-guest")] + cow_map: Option<&'a std::collections::HashMap>, } + impl<'a> SharedMemoryPageTableBuffer<'a> { pub(crate) fn new( snap: &'a [u8], @@ -202,8 +217,16 @@ impl<'a> SharedMemoryPageTableBuffer<'a> { scratch, layout, root, + #[cfg(feature = "i686-guest")] + cow_map: None, } } + + #[cfg(feature = "i686-guest")] + fn with_cow_map(mut self, cow_map: &'a std::collections::HashMap) -> Self { + self.cow_map = Some(cow_map); + self + } } impl<'a> hyperlight_common::vmem::TableReadOps for SharedMemoryPageTableBuffer<'a> { type TableAddr = u64; @@ -211,19 +234,48 @@ impl<'a> hyperlight_common::vmem::TableReadOps for SharedMemoryPageTableBuffer<' addr + offset } unsafe fn read_entry(&self, addr: u64) -> u64 { - let memoff = access_gpa(self.snap, self.scratch, self.layout, addr); - let Some(pte_bytes) = memoff.and_then(|(mem, off)| mem.get(off..off + 8)) else { - // Attacker-controlled data pointed out-of-bounds. We'll - // default to returning 0 in this case, which, for most - // architectures (including x86-64 and arm64, the ones we - // care about presently) will be a not-present entry. - return 0; + // For i686: if the GPA was CoW'd, read from the scratch copy instead. + #[cfg(feature = "i686-guest")] + let addr = { + let page_gpa = addr & 0xFFFFF000; + if let Some(map) = self.cow_map { + if let Some(&scratch_gpa) = map.get(&page_gpa) { + scratch_gpa + (addr & 0xFFF) + } else { + addr + } + } else { + addr + } }; - // this is statically the correct size, so using unwrap() here - // doesn't make this any more panic-y. - #[allow(clippy::unwrap_used)] - let n: [u8; 8] = pte_bytes.try_into().unwrap(); - u64::from_ne_bytes(n) + let memoff = access_gpa(self.snap, self.scratch, self.layout, addr); + // For i686 guests, page table entries are 4 bytes; for x86_64 they + // are 8 bytes. Read the correct size based on the feature flag. + #[cfg(feature = "i686-guest")] + { + let Some(pte_bytes) = memoff.and_then(|(mem, off)| mem.get(off..off + 4)) else { + // Out-of-bounds: return 0, which is a not-present entry. + return 0; + }; + #[allow(clippy::unwrap_used)] + let n: [u8; 4] = pte_bytes.try_into().unwrap(); + u32::from_ne_bytes(n) as u64 + } + #[cfg(not(feature = "i686-guest"))] + { + let Some(pte_bytes) = memoff.and_then(|(mem, off)| mem.get(off..off + 8)) else { + // Attacker-controlled data pointed out-of-bounds. We'll + // default to returning 0 in this case, which, for most + // architectures (including x86-64 and arm64, the ones we + // care about presently) will be a not-present entry. + return 0; + }; + // this is statically the correct size, so using unwrap() here + // doesn't make this any more panic-y. + #[allow(clippy::unwrap_used)] + let n: [u8; 8] = pte_bytes.try_into().unwrap(); + u64::from_ne_bytes(n) + } } fn to_phys(addr: u64) -> u64 { addr @@ -240,34 +292,424 @@ impl<'a> core::convert::AsRef> for SharedMemoryP self } } + +/// Build a CoW resolution map by walking a kernel PD. +/// For each PTE that maps a VA in [0, MEMORY_SIZE) to a PA in scratch, +/// record: original_gpa -> scratch_gpa. +#[cfg(feature = "i686-guest")] +fn build_cow_map( + snap: &[u8], + scratch: &[u8], + layout: SandboxMemoryLayout, + kernel_root: u64, +) -> std::collections::HashMap { + use hyperlight_common::layout::scratch_base_gpa; + let mut cow_map = std::collections::HashMap::new(); + let scratch_base = scratch_base_gpa(layout.get_scratch_size()); + let scratch_end = scratch_base + layout.get_scratch_size() as u64; + let mem_size = layout.get_memory_size().unwrap_or(0) as u64; + + for pdi in 0..1024u64 { + let pde_addr = kernel_root + pdi * 4; + let pde = access_gpa(snap, scratch, layout, pde_addr) + .and_then(|(mem, off)| mem.get(off..off + 4)) + .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .unwrap_or(0); + if (pde & 1) == 0 { + continue; + } + let pt_gpa = (pde & 0xFFFFF000) as u64; + for pti in 0..1024u64 { + let pte_addr = pt_gpa + pti * 4; + let pte = access_gpa(snap, scratch, layout, pte_addr) + .and_then(|(mem, off)| mem.get(off..off + 4)) + .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .unwrap_or(0); + if (pte & 1) == 0 { + continue; + } + let frame_gpa = (pte & 0xFFFFF000) as u64; + let va = (pdi << 22) | (pti << 12); + if va < mem_size && frame_gpa >= scratch_base && frame_gpa < scratch_end { + cow_map.insert(va, frame_gpa); + } + } + } + cow_map +} + +/// Helper for building i686 2-level page tables as a flat byte buffer. +/// +/// The buffer stores one or more page directories (PDs) at the front, +/// followed by page tables (PTs) that are allocated on demand. All +/// entries use 4-byte i686 PTEs. +#[cfg(feature = "i686-guest")] +mod i686_pt { + use hyperlight_common::vmem::i686_guest::{PAGE_ACCESSED, PAGE_AVL_COW, PAGE_PRESENT, PAGE_RW}; + + const PTE_PRESENT: u32 = PAGE_PRESENT as u32; + const PTE_RW: u32 = PAGE_RW as u32; + const PTE_ACCESSED: u32 = PAGE_ACCESSED as u32; + pub(super) const PTE_COW: u32 = PAGE_AVL_COW as u32; + pub(super) const ADDR_MASK: u32 = 0xFFFFF000; + pub(super) const RW_FLAGS: u32 = PTE_PRESENT | PTE_RW | PTE_ACCESSED; + const PAGE_SIZE: usize = 4096; + + pub(super) struct Builder { + pub bytes: Vec, + pd_base_gpa: usize, + } + + impl Builder { + pub fn new(pd_base_gpa: usize) -> Self { + Self { + bytes: vec![0u8; PAGE_SIZE], + pd_base_gpa, + } + } + + pub fn with_pds(pd_base_gpa: usize, num_pds: usize) -> Self { + Self { + bytes: vec![0u8; num_pds * PAGE_SIZE], + pd_base_gpa, + } + } + + pub fn read_u32(&self, offset: usize) -> u32 { + let b = &self.bytes[offset..offset + 4]; + u32::from_le_bytes([b[0], b[1], b[2], b[3]]) + } + + fn write_u32(&mut self, offset: usize, val: u32) { + self.bytes[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); + } + + /// Ensures a page table exists for PDE index `pdi` within the PD + /// at byte offset `pd_offset`. Allocates a new PT page at the end + /// of the buffer if absent. Returns the byte offset of the PT. + pub fn ensure_pt(&mut self, pd_offset: usize, pdi: usize, pde_flags: u32) -> usize { + let pde_off = pd_offset + pdi * 4; + let pde = self.read_u32(pde_off); + if (pde & PTE_PRESENT) != 0 { + (pde & ADDR_MASK) as usize - self.pd_base_gpa + } else { + let pt_offset = self.bytes.len(); + self.bytes.resize(pt_offset + PAGE_SIZE, 0); + let pt_gpa = (self.pd_base_gpa + pt_offset) as u32; + self.write_u32(pde_off, pt_gpa | pde_flags); + pt_offset + } + } + + /// Maps a single 4K page within the PD at `pd_offset`. + pub fn map_page(&mut self, pd_offset: usize, va: u64, pa: u64, pte_flags: u32) { + let pdi = ((va as u32 >> 22) & 0x3FF) as usize; + let pti = ((va as u32 >> 12) & 0x3FF) as usize; + let pt_offset = self.ensure_pt(pd_offset, pdi, RW_FLAGS); + let pte_off = pt_offset + pti * 4; + self.write_u32(pte_off, (pa as u32) | pte_flags); + } + + /// Maps a contiguous range of pages with uniform flags. + pub fn map_range( + &mut self, + pd_offset: usize, + va_start: u64, + pa_start: u64, + len: u64, + pte_flags: u32, + ) { + let mut va = va_start; + let mut pa = pa_start; + let end = va_start + len; + while va < end { + self.map_page(pd_offset, va, pa, pte_flags); + va += PAGE_SIZE as u64; + pa += PAGE_SIZE as u64; + } + } + + pub fn into_bytes(self) -> Vec { + self.bytes + } + } +} + +/// Build initial i686 page tables for a freshly loaded guest binary. +/// Maps snapshot regions (with CoW flags for writable pages) and the scratch region. +#[cfg(feature = "i686-guest")] +fn build_initial_i686_page_tables( + layout: &crate::mem::layout::SandboxMemoryLayout, +) -> crate::Result> { + use i686_pt::{PTE_COW, RW_FLAGS}; + + use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; + + let pd_base_gpa = layout.get_pt_base_gpa() as usize; + let mut pt = i686_pt::Builder::new(pd_base_gpa); + + let ro_flags = hyperlight_common::vmem::i686_guest::PAGE_PRESENT as u32 + | hyperlight_common::vmem::i686_guest::PAGE_ACCESSED as u32; + + // 1. Map snapshot memory regions + for rgn in layout.get_memory_regions_::(())?.iter() { + let flags = if rgn.flags.contains(MemoryRegionFlags::WRITE) { + ro_flags | PTE_COW + } else { + ro_flags + }; + pt.map_range( + 0, + rgn.guest_region.start as u64, + rgn.guest_region.start as u64, + rgn.guest_region.len() as u64, + flags, + ); + } + + // 2. Map scratch region (writable, not CoW) + let scratch_size = layout.get_scratch_size(); + let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); + let scratch_gva = hyperlight_common::layout::scratch_base_gva(scratch_size); + pt.map_range(0, scratch_gva, scratch_gpa, scratch_size as u64, RW_FLAGS); + + Ok(pt.into_bytes()) +} + +/// Compact an i686 snapshot: densely pack live pages and rebuild +/// per-process page tables with updated GPAs. +/// +/// Returns `(snapshot_memory, pt_bytes)`. +#[cfg(feature = "i686-guest")] +fn compact_i686_snapshot( + snap: &[u8], + scratch: &[u8], + layout: SandboxMemoryLayout, + live_pages: Vec<(Mapping, &[u8])>, + root_pt_gpas: &[u64], + cow_map: &std::collections::HashMap, + phys_seen: &mut std::collections::HashMap, +) -> crate::Result<(Vec, Vec)> { + use hyperlight_common::vmem::i686_guest::{PAGE_PRESENT, PAGE_USER}; + use i686_pt::{ADDR_MASK, PTE_COW, RW_FLAGS}; + + let page_size: usize = 4096; + + // Phase 1: pack live pages densely into a new snapshot buffer. + let mut snapshot_memory: Vec = Vec::new(); + for (mapping, contents) in live_pages { + if matches!(mapping.kind, MappingKind::Unmapped) { + continue; + } + phys_seen.entry(mapping.phys_base).or_insert_with(|| { + let new_offset = snapshot_memory.len(); + snapshot_memory.extend(contents); + new_offset + SandboxMemoryLayout::BASE_ADDRESS + }); + } + + // Phase 2: build per-process page tables with compacted GPAs. + let pd_base_gpa = layout.get_pt_base_gpa() as usize; + let n_roots = root_pt_gpas.len().max(1); + let mut pt = i686_pt::Builder::with_pds(pd_base_gpa, n_roots); + + let scratch_size = layout.get_scratch_size(); + let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); + + // Helper: read a u32 from guest memory, resolving CoW redirections. + let read_u32 = |gpa: u64| -> u32 { + let resolved = { + let page = gpa & 0xFFFFF000; + cow_map + .get(&page) + .map_or(gpa, |&scratch| scratch + (gpa & 0xFFF)) + }; + access_gpa(snap, scratch, layout, resolved) + .and_then(|(mem, off)| mem.get(off..off + 4)) + .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .unwrap_or(0) + }; + + // Rebuild a single page table with remapped frame GPAs. + let rebuild_pt = |pt: &mut i686_pt::Builder, + old_pt_gpa: u64, + extra_flags: u32, + phys_map: &std::collections::HashMap| + -> u32 { + let new_pt_offset = pt.bytes.len(); + pt.bytes.resize(new_pt_offset + page_size, 0); + let new_pt_gpa = (pd_base_gpa + new_pt_offset) as u32; + for pti in 0..1024usize { + let pte = read_u32(old_pt_gpa + pti as u64 * 4); + if (pte & PAGE_PRESENT as u32) == 0 { + continue; + } + let old_frame = (pte & ADDR_MASK) as u64; + let Some(&new_gpa) = phys_map.get(&old_frame) else { + continue; + }; + let mut flags = (pte & 0xFFF) | extra_flags; + // Mark writable or already-CoW pages as CoW (read-only + AVL bit). + if (flags & RW_FLAGS & !PTE_COW) != 0 || (flags & PTE_COW) != 0 { + flags = (flags & !(hyperlight_common::vmem::i686_guest::PAGE_RW as u32)) | PTE_COW; + } + let off = new_pt_offset + pti * 4; + pt.bytes[off..off + 4].copy_from_slice(&((new_gpa as u32) | flags).to_le_bytes()); + } + new_pt_gpa + }; + + // Resolve a VA through a PD to its physical frame. + let resolve_through_pd = |pd_gpa: u64, va: u64| -> u64 { + let pdi = (va >> 22) & 0x3FF; + let pde = read_u32(pd_gpa + pdi * 4); + if (pde & PAGE_PRESENT as u32) == 0 { + return va; + } + let pti = (va >> 12) & 0x3FF; + let pte = read_u32((pde & ADDR_MASK) as u64 + pti * 4); + if (pte & PAGE_PRESENT as u32) == 0 { + return va; + } + (pte & ADDR_MASK) as u64 + }; + + // Build kernel page tables (lower 256 PD entries) from the first root. + let first_root = root_pt_gpas.first().copied().ok_or_else(|| { + crate::new_error!("compact_i686_snapshot called with no page directory roots") + })?; + let mut kernel_pdes = [0u32; 256]; + for (pdi, kernel_pde) in kernel_pdes.iter_mut().enumerate() { + let pde = read_u32(first_root + pdi as u64 * 4); + if (pde & PAGE_PRESENT as u32) == 0 { + continue; + } + let new_pt_gpa = rebuild_pt(&mut pt, (pde & ADDR_MASK) as u64, 0, phys_seen); + *kernel_pde = (pde & 0xFFF) | new_pt_gpa; + } + + // Fill in per-process PDs: kernel half (shared) + user half (per-process). + for (root_idx, &root) in root_pt_gpas.iter().enumerate() { + let pd_offset = root_idx * page_size; + // Copy kernel PDEs (lower 256 entries) into this PD. + for (pdi, &kpde) in kernel_pdes.iter().enumerate() { + if kpde != 0 { + pt.bytes[pd_offset + pdi * 4..pd_offset + pdi * 4 + 4] + .copy_from_slice(&kpde.to_le_bytes()); + } + } + // Rebuild user PDEs (upper 256 entries). + for pdi in 256..1024usize { + let pde = read_u32(root + pdi as u64 * 4); + if (pde & PAGE_PRESENT as u32) == 0 { + continue; + } + let user = PAGE_USER as u32; + let pt_gpa_raw = (pde & ADDR_MASK) as u64; + let pt_gpa = resolve_through_pd(first_root, pt_gpa_raw); + let new_pt_gpa = rebuild_pt(&mut pt, pt_gpa, user, phys_seen); + let fixed_pde = (pde & 0xFFF) | new_pt_gpa | user; + pt.bytes[pd_offset + pdi * 4..pd_offset + pdi * 4 + 4] + .copy_from_slice(&fixed_pde.to_le_bytes()); + } + } + + // Map scratch and snapshot identity regions into every PD. + for ri in 0..n_roots { + let pd_off = ri * page_size; + pt.map_range( + pd_off, + scratch_gpa, + scratch_gpa, + scratch_size as u64, + RW_FLAGS, + ); + + let snapshot_end = SandboxMemoryLayout::BASE_ADDRESS + snapshot_memory.len(); + let snapshot_pages = (snapshot_end - SandboxMemoryLayout::BASE_ADDRESS) / page_size; + for pi in 0..snapshot_pages { + let gpa = (SandboxMemoryLayout::BASE_ADDRESS + pi * page_size) as u64; + let pdi = ((gpa >> 22) & 0x3FF) as usize; + let pti = ((gpa >> 12) & 0x3FF) as usize; + let pt_off = pt.ensure_pt(pd_off, pdi, RW_FLAGS); + let pte_off = pt_off + pti * 4; + if pt.read_u32(pte_off) & PAGE_PRESENT as u32 == 0 { + pt.bytes[pte_off..pte_off + 4] + .copy_from_slice(&((gpa as u32) | RW_FLAGS).to_le_bytes()); + } + } + } + + Ok((snapshot_memory, pt.into_bytes())) +} + fn filtered_mappings<'a>( snap: &'a [u8], scratch: &'a [u8], regions: &[MemoryRegion], layout: SandboxMemoryLayout, - root_pt: u64, + root_pts: &[u64], + #[cfg(feature = "i686-guest")] cow_map: &std::collections::HashMap, ) -> Vec<(Mapping, &'a [u8])> { - let op = SharedMemoryPageTableBuffer::new(snap, scratch, layout, root_pt); - unsafe { - hyperlight_common::vmem::virt_to_phys(&op, 0, hyperlight_common::layout::MAX_GVA as u64) - } - .filter_map(move |mapping| { - // the scratch map doesn't count - if mapping.virt_base >= scratch_base_gva(layout.get_scratch_size()) { - return None; + #[cfg(not(feature = "i686-guest"))] + let mappings_iter: Vec = { + let Some(&root_pt) = root_pts.first() else { + return Vec::new(); + }; + let op = SharedMemoryPageTableBuffer::new(snap, scratch, layout, root_pt); + unsafe { + hyperlight_common::vmem::virt_to_phys(&op, 0, hyperlight_common::layout::MAX_GVA as u64) } - // neither does the mapping of the snapshot's own page tables - #[cfg(not(feature = "i686-guest"))] - if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 - && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 - { - return None; + .collect() + }; + + #[cfg(feature = "i686-guest")] + let mappings_iter: Vec = { + use std::collections::HashSet; + let mut mappings = Vec::new(); + let mut seen_phys = HashSet::new(); + + let scratch_base_gva_val = + hyperlight_common::layout::scratch_base_gva(layout.get_scratch_size()); + for &root_pt in root_pts { + let op = SharedMemoryPageTableBuffer::new(snap, scratch, layout, root_pt) + .with_cow_map(cow_map); + let root_mappings = + unsafe { hyperlight_common::vmem::i686_guest::virt_to_phys_all(&op) }; + for m in root_mappings { + // Skip mappings whose VA is in the scratch region - these + // are identity-mapped helpers and would poison seen_phys for + // legitimate user mappings that share the same scratch PAs. + if m.virt_base >= scratch_base_gva_val { + continue; + } + if seen_phys.insert(m.phys_base) { + mappings.push(m); + } + } } - // todo: is it useful to warn if we can't resolve this? - let contents = unsafe { guest_page(snap, scratch, regions, layout, mapping.phys_base) }?; - Some((mapping, contents)) - }) - .collect() + mappings + }; + + mappings_iter + .into_iter() + .filter_map(move |mapping| { + // the scratch map doesn't count + if mapping.virt_base >= scratch_base_gva(layout.get_scratch_size()) { + return None; + } + // neither does the mapping of the snapshot's own page tables + #[cfg(not(feature = "i686-guest"))] + if mapping.virt_base >= hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN as u64 + && mapping.virt_base <= hyperlight_common::layout::SNAPSHOT_PT_GVA_MAX as u64 + { + return None; + } + let contents = + unsafe { guest_page(snap, scratch, regions, layout, mapping.phys_base) }?; + Some((mapping, contents)) + }) + .collect() } /// Find the contents of the page which starts at gpa in guest physical @@ -293,6 +735,7 @@ unsafe fn guest_page<'a>( Some(&resolved.as_ref()[..PAGE_SIZE]) } +#[cfg(not(feature = "i686-guest"))] fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { // Map the scratch region let mapping = Mapping { @@ -406,6 +849,12 @@ impl Snapshot { layout.set_pt_size(pt_bytes.len())?; memory.extend(&pt_bytes); }; + #[cfg(feature = "i686-guest")] + { + let pt_bytes = build_initial_i686_page_tables(&layout)?; + layout.set_pt_size(pt_bytes.len())?; + memory.extend(&pt_bytes); + }; let exn_stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET @@ -423,6 +872,8 @@ impl Snapshot { hash, stack_top_gva: exn_stack_top_gva, sregs: None, + #[cfg(feature = "i686-guest")] + separate_pt_bytes: Vec::new(), entrypoint: NextAction::Initialise(load_addr + entrypoint_va - base_va), }) } @@ -443,7 +894,7 @@ impl Snapshot { mut layout: SandboxMemoryLayout, load_info: LoadInfo, regions: Vec, - root_pt_gpa: u64, + root_pt_gpas: &[u64], stack_top_gva: u64, sregs: CommonSpecialRegisters, entrypoint: NextAction, @@ -452,54 +903,96 @@ impl Snapshot { let mut phys_seen = HashMap::::new(); let memory = shared_mem.with_contents(|snap_c| { scratch_mem.with_contents(|scratch_c| { - // Pass 1: count how many pages need to live - let live_pages = - filtered_mappings(snap_c, scratch_c, ®ions, layout, root_pt_gpa); + // Build CoW resolution map (i686 only): maps original GPAs + // to their CoW'd scratch GPAs so the PT walker can read the + // actual page table data instead of stale snapshot copies. + #[cfg(feature = "i686-guest")] + let cow_map = { + let kernel_root = root_pt_gpas.first().copied().ok_or_else(|| { + crate::new_error!("snapshot requires at least one page directory root") + })?; + build_cow_map(snap_c, scratch_c, layout, kernel_root) + }; + + // Pass 1: collect live pages + let live_pages = filtered_mappings( + snap_c, + scratch_c, + ®ions, + layout, + root_pt_gpas, + #[cfg(feature = "i686-guest")] + &cow_map, + ); - // Pass 2: copy them, and map them + // Pass 2: copy live pages and build new page tables // TODO: Look for opportunities to hugepage map - let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); - let mut snapshot_memory: Vec = Vec::new(); - for (mapping, contents) in live_pages { - let kind = match mapping.kind { - MappingKind::Cow(cm) => MappingKind::Cow(cm), - MappingKind::Basic(bm) if bm.writable => MappingKind::Cow(CowMapping { - readable: bm.readable, - executable: bm.executable, - }), - MappingKind::Basic(bm) => MappingKind::Basic(BasicMapping { - readable: bm.readable, - writable: false, - executable: bm.executable, - }), - MappingKind::Unmapped => continue, - }; - let new_gpa = phys_seen.entry(mapping.phys_base).or_insert_with(|| { - let new_offset = snapshot_memory.len(); - snapshot_memory.extend(contents); - new_offset + SandboxMemoryLayout::BASE_ADDRESS - }); - let mapping = Mapping { - phys_base: *new_gpa as u64, - virt_base: mapping.virt_base, - len: PAGE_SIZE as u64, - kind, - }; - unsafe { vmem::map(&pt_buf, mapping) }; - } - // Phase 3: Map the special mappings - map_specials(&pt_buf, layout.get_scratch_size()); - let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len())?; - snapshot_memory.extend(&pt_bytes); - Ok::, crate::HyperlightError>(snapshot_memory) + #[cfg(not(feature = "i686-guest"))] + let (snapshot_memory, pt_bytes) = { + let mut snapshot_memory: Vec = Vec::new(); + let pt_buf = GuestPageTableBuffer::new(layout.get_pt_base_gpa() as usize); + for (mapping, contents) in live_pages { + let kind = match mapping.kind { + MappingKind::Cow(cm) => MappingKind::Cow(cm), + MappingKind::Basic(bm) if bm.writable => MappingKind::Cow(CowMapping { + readable: bm.readable, + executable: bm.executable, + }), + MappingKind::Basic(bm) => MappingKind::Basic(BasicMapping { + readable: bm.readable, + writable: false, + executable: bm.executable, + }), + MappingKind::Unmapped => continue, + }; + let new_gpa = phys_seen.entry(mapping.phys_base).or_insert_with(|| { + let new_offset = snapshot_memory.len(); + snapshot_memory.extend(contents); + new_offset + SandboxMemoryLayout::BASE_ADDRESS + }); + let mapping = Mapping { + phys_base: *new_gpa as u64, + virt_base: mapping.virt_base, + len: PAGE_SIZE as u64, + kind, + }; + unsafe { vmem::map(&pt_buf, mapping) }; + } + map_specials(&pt_buf, layout.get_scratch_size()); + let pt_data = pt_buf.into_bytes(); + layout.set_pt_size(pt_data.len())?; + snapshot_memory.extend(&pt_data); + (snapshot_memory, Vec::new()) + }; + + #[cfg(feature = "i686-guest")] + let (snapshot_memory, pt_bytes) = { + let (mem, pt) = compact_i686_snapshot( + snap_c, + scratch_c, + layout, + live_pages, + root_pt_gpas, + &cow_map, + &mut phys_seen, + )?; + layout.set_pt_size(pt.len())?; + (mem, pt) + }; + + Ok::<(Vec, Vec), crate::HyperlightError>((snapshot_memory, pt_bytes)) }) })???; + let (memory, separate_pt_bytes) = memory; layout.set_snapshot_size(memory.len()); - // We do not need the original regions anymore, as any uses of - // them in the guest have been incorporated into the snapshot - // properly. + // For i686, keep the regions so the RAMFS and other map_file_cow + // mappings are accessible after restore. For x86_64, we do not + // need the original regions anymore, as any uses of them in the + // guest have been incorporated into the snapshot properly. + #[cfg(feature = "i686-guest")] + let regions = regions; + #[cfg(not(feature = "i686-guest"))] let regions = Vec::new(); let hash = hash(&memory, ®ions)?; @@ -512,6 +1005,8 @@ impl Snapshot { hash, stack_top_gva, sregs: Some(sregs), + #[cfg(feature = "i686-guest")] + separate_pt_bytes, entrypoint, }) } @@ -558,6 +1053,11 @@ impl Snapshot { self.sregs.as_ref() } + #[cfg(feature = "i686-guest")] + pub(crate) fn separate_pt_bytes(&self) -> &[u8] { + &self.separate_pt_bytes + } + pub(crate) fn entrypoint(&self) -> NextAction { self.entrypoint } @@ -570,6 +1070,7 @@ impl PartialEq for Snapshot { } #[cfg(test)] +#[cfg(not(feature = "i686-guest"))] mod tests { use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; @@ -638,7 +1139,7 @@ mod tests { mgr.layout, LoadInfo::dummy(), Vec::new(), - pt_base, + &[pt_base], 0, default_sregs(), super::NextAction::None, @@ -654,7 +1155,7 @@ mod tests { mgr.layout, LoadInfo::dummy(), Vec::new(), - pt_base, + &[pt_base], 0, default_sregs(), super::NextAction::None,