From ca827bee15ce42ac0232ce12fbf13fb92d8788b6 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:56:34 -0700 Subject: [PATCH 1/5] Enable cross-sandbox snapshot restore Removes Snapshot.sandbox_id and MultiUseSandbox.id. A snapshot can now be restored into any layout-compatible MultiUseSandbox whose registered host functions are a superset of those required by the snapshot. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/error.rs | 25 +- src/hyperlight_host/src/mem/layout.rs | 86 ++++ src/hyperlight_host/src/mem/mgr.rs | 2 - .../src/sandbox/initialized_multi_use.rs | 393 ++++++++++++++++-- .../src/sandbox/snapshot/mod.rs | 53 +-- 5 files changed, 482 insertions(+), 77 deletions(-) diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 3a1d0955c..c6738374d 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -240,9 +240,25 @@ pub enum HyperlightError { #[error("Failed To Convert Return Value {0:?} to {1:?}")] ReturnValueConversionFailure(ReturnValue, &'static str), - /// Tried to restore snapshot to a sandbox that is not the same as the one the snapshot was taken from - #[error("Snapshot was taken from a different sandbox")] - SnapshotSandboxMismatch, + /// Tried to restore a snapshot into a sandbox whose memory + /// layout is not compatible with the snapshot's. + #[error("Snapshot memory layout is not compatible with this sandbox")] + SnapshotLayoutMismatch, + + /// Tried to restore a snapshot into a sandbox whose registered + /// host functions do not satisfy the snapshot's required set. + #[error( + "Snapshot host function mismatch: missing=[{}], signature mismatches=[{}]", + missing.join(", "), + signature_mismatches.join("; ") + )] + SnapshotHostFunctionMismatch { + /// Functions that are required by the snapshot but not present in the target sandbox. + missing: Vec, + /// Human-readable descriptions of functions whose signatures + /// disagree between the snapshot and the target sandbox. + signature_mismatches: Vec, + }, /// SystemTimeError #[error("SystemTimeError {0:?}")] @@ -385,7 +401,8 @@ impl HyperlightError { | HyperlightError::RefCellBorrowFailed(_) | HyperlightError::RefCellMutBorrowFailed(_) | HyperlightError::ReturnValueConversionFailure(_, _) - | HyperlightError::SnapshotSandboxMismatch + | HyperlightError::SnapshotLayoutMismatch + | HyperlightError::SnapshotHostFunctionMismatch { .. } | HyperlightError::SystemTimeError(_) | HyperlightError::TryFromSliceError(_) | HyperlightError::UnexpectedNoOfArguments(_, _) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index d8b81a59e..fa1671e4a 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -296,6 +296,40 @@ impl Debug for SandboxMemoryLayout { } impl SandboxMemoryLayout { + /// Whether `other` has the same layout configuration as `self`, + /// i.e. the fields that come from the guest binary and the + /// `SandboxConfiguration`. `snapshot_size` and `pt_size` are + /// excluded because they are outputs of building a snapshot blob + /// (the compacted data size and the size of the rebuilt + /// page-table tail), not configuration inputs, so they differ + /// between the sandbox's live layout and any snapshot taken + /// from it. + /// + /// TODO: separate/remove snapshot_size and pt_size from this struct. + pub(crate) fn is_compatible_with(&self, other: &Self) -> bool { + // Exhaustive destructure so adding a field to + // `SandboxMemoryLayout` fails to compile here, forcing the + // author to decide whether it participates in compatibility. + let Self { + input_data_size, + output_data_size, + heap_size, + code_size, + init_data_size, + init_data_permissions, + scratch_size, + snapshot_size: _, + pt_size: _, + } = self; + *input_data_size == other.input_data_size + && *output_data_size == other.output_data_size + && *heap_size == other.heap_size + && *code_size == other.code_size + && *init_data_size == other.init_data_size + && *init_data_permissions == other.init_data_permissions + && *scratch_size == other.scratch_size + } + /// The maximum amount of memory a single sandbox will be allowed. /// /// Both the scratch region and the snapshot region are bounded by @@ -782,4 +816,56 @@ mod tests { let layout = SandboxMemoryLayout::new(cfg, 4096, 4096, None); assert!(matches!(layout.unwrap_err(), MemoryRequestTooBig(..))); } + + #[test] + fn is_compatible_with_identical_layouts() { + let cfg = SandboxConfiguration::default(); + let a = SandboxMemoryLayout::new(cfg, 4096, 0, None).unwrap(); + let b = SandboxMemoryLayout::new(cfg, 4096, 0, None).unwrap(); + assert!(a.is_compatible_with(&b)); + assert!(b.is_compatible_with(&a)); + } + + #[test] + fn is_compatible_with_ignores_snapshot_size_and_pt_size() { + // `snapshot_size` and `pt_size` are outputs of building a + // snapshot blob, not configuration inputs, so flipping + // them must not break compatibility. + let cfg = SandboxConfiguration::default(); + let a = SandboxMemoryLayout::new(cfg, 4096, 0, None).unwrap(); + let mut b = a; + b.snapshot_size = a.snapshot_size + PAGE_SIZE_USIZE; + b.set_pt_size(PAGE_SIZE_USIZE).unwrap(); + assert!(a.is_compatible_with(&b)); + assert!(b.is_compatible_with(&a)); + } + + #[test] + fn is_compatible_with_rejects_each_configured_field() { + let cfg = SandboxConfiguration::default(); + let base = SandboxMemoryLayout::new(cfg, 4096, 0, None).unwrap(); + + // Each mutation must independently break compatibility. + let mutators: &[fn(&mut SandboxMemoryLayout)] = &[ + |l| l.input_data_size += PAGE_SIZE_USIZE, + |l| l.output_data_size += PAGE_SIZE_USIZE, + |l| l.heap_size += PAGE_SIZE_USIZE, + |l| l.code_size += PAGE_SIZE_USIZE, + |l| l.init_data_size += PAGE_SIZE_USIZE, + |l| l.scratch_size += PAGE_SIZE_USIZE, + |l| { + l.init_data_permissions = Some(MemoryRegionFlags::READ); + }, + ]; + for mutate in mutators { + let mut other = base; + mutate(&mut other); + assert!( + !base.is_compatible_with(&other), + "mutation should have broken compatibility: {:?} vs {:?}", + base, + other, + ); + } + } } diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 3a59918b7..dfeebcc7a 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -302,7 +302,6 @@ where #[allow(clippy::too_many_arguments)] pub(crate) fn snapshot( &mut self, - sandbox_id: u64, mapped_regions: Vec, root_pt_gpas: &[u64], rsp_gva: u64, @@ -314,7 +313,6 @@ where Snapshot::new( &mut self.shared_mem, &mut self.scratch_mem, - sandbox_id, self.layout, crate::mem::exe::LoadInfo::dummy(), mapped_regions, diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index a8761eed2..f411bf0e9 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -16,7 +16,6 @@ limitations under the License. use std::collections::HashSet; use std::path::Path; -use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; use flatbuffers::FlatBufferBuilder; @@ -31,7 +30,6 @@ use super::Callable; use super::file_mapping::prepare_file_cow; use super::host_funcs::FunctionRegistry; use super::snapshot::Snapshot; -use crate::HyperlightError::{self, SnapshotSandboxMismatch}; use crate::func::{ParameterTuple, SupportedReturnType}; use crate::hypervisor::InterruptHandle; use crate::hypervisor::hyperlight_vm::{HyperlightVm, HyperlightVmError}; @@ -41,7 +39,7 @@ use crate::mem::shared_mem::{HostSharedMemory, SharedMemory as _}; use crate::metrics::{ METRIC_GUEST_ERROR, METRIC_GUEST_ERROR_LABEL_CODE, maybe_time_and_emit_guest_call, }; -use crate::{Result, log_then_return}; +use crate::{HyperlightError, Result, log_then_return}; /// A fully initialized sandbox that can execute guest functions multiple times. /// @@ -82,8 +80,6 @@ use crate::{Result, log_then_return}; /// This is the **only safe way** to recover - it completely replaces all memory state, /// eliminating any inconsistencies. See [`restore()`](Self::restore) for details. pub struct MultiUseSandbox { - /// Unique identifier for this sandbox instance - id: u64, /// Whether this sandbox is poisoned poisoned: bool, pub(crate) host_funcs: Arc>, @@ -126,7 +122,6 @@ impl MultiUseSandbox { #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> MultiUseSandbox { Self { - id: super::snapshot::SANDBOX_CONFIGURATION_COUNTER.fetch_add(1, Ordering::Relaxed), poisoned: false, host_funcs, mem_mgr: mgr, @@ -156,7 +151,9 @@ impl MultiUseSandbox { /// The provided [`HostFunctions`] must include every host function /// that was registered on the sandbox at the time the snapshot was /// taken (matched by name and signature). Additional host functions - /// not present in the snapshot are allowed. + /// not present in the snapshot are allowed. A mismatch returns + /// [`SnapshotHostFunctionMismatch`](crate::HyperlightError::SnapshotHostFunctionMismatch) + /// carrying the missing names and signature differences. /// /// An optional [`SandboxConfiguration`](crate::sandbox::SandboxConfiguration) /// can be supplied to override runtime settings such as timeouts and @@ -201,7 +198,7 @@ impl MultiUseSandbox { // Validate that the provided host functions are a superset of // those required by the snapshot. - snapshot.validate_host_functions(&host_funcs)?; + snapshot.validate_host_functions(host_funcs.inner())?; let host_funcs = Arc::new(Mutex::new(host_funcs.into_inner())); @@ -300,29 +297,25 @@ impl MultiUseSandbox { #[cfg(gdb)] let dbg_mem_wrapper = Arc::new(Mutex::new(hshm.clone())); - let mut sbox = MultiUseSandbox::from_uninit( + let sbox = MultiUseSandbox::from_uninit( host_funcs, hshm, vm, #[cfg(gdb)] dbg_mem_wrapper, ); - // Use the snapshot's sandbox_id so that restore() back to this - // snapshot is permitted. The id is process-local and never - // persisted to disk: `Snapshot::from_oci` assigns a fresh id - // on every load, so two `from_file` calls of the same path - // yield restore-incompatible sandboxes (which is the intended - // safer default). Sandboxes built from clones of the same - // in-memory `Arc` share the id and are mutually - // restore-compatible. - sbox.id = snapshot.sandbox_id(); Ok(sbox) } /// Creates a snapshot of the sandbox's current memory state. /// - /// The snapshot is tied to this specific sandbox instance and can only be - /// restored to the same sandbox it was created from. + /// The returned snapshot can be applied to any + /// [`MultiUseSandbox`] whose memory layout is structurally + /// compatible with this sandbox's layout and whose registered + /// host functions are a superset of those registered here at the + /// time of capture. See [`MultiUseSandbox::restore`] and + /// [`MultiUseSandbox::from_snapshot`] for the exact compatibility + /// rules and the error variants returned on mismatch. /// /// ## Poisoned Sandbox /// @@ -342,7 +335,7 @@ impl MultiUseSandbox { /// // Modify sandbox state /// sandbox.call_guest_function_by_name::("SetValue", 42)?; /// - /// // Create snapshot belonging to this sandbox + /// // Capture a snapshot of the current memory state /// let snapshot = sandbox.snapshot()?; /// # Ok(()) /// # } @@ -387,7 +380,6 @@ impl MultiUseSandbox { .into(); let memory_snapshot = self.mem_mgr.snapshot( - self.id, mapped_regions_vec, &root_pt_gpas, stack_top_gpa, @@ -402,11 +394,16 @@ impl MultiUseSandbox { /// Restores the sandbox's memory to a previously captured snapshot state. /// - /// The snapshot must have been created from this same sandbox instance. - /// Attempting to restore a snapshot from a different sandbox will return - /// a [`SnapshotSandboxMismatch`](crate::HyperlightError::SnapshotSandboxMismatch) error. + /// The snapshot's memory layout must be structurally compatible + /// with this sandbox's layout, otherwise this returns + /// [`SnapshotLayoutMismatch`](crate::HyperlightError::SnapshotLayoutMismatch). /// - /// Registered host functions are not modified by `restore`. + /// The sandbox's registered host functions must be a superset of + /// those required by the snapshot (matched by name and + /// signature). Extras on the sandbox are allowed. The registry + /// itself is left unchanged. A mismatch returns + /// [`SnapshotHostFunctionMismatch`](crate::HyperlightError::SnapshotHostFunctionMismatch) + /// carrying the missing names and signature differences. /// /// ## Poison State Recovery /// @@ -502,8 +499,12 @@ impl MultiUseSandbox { // However, out of an abundance of caution, the optimisation // is presently disabled. - if self.id != snapshot.sandbox_id() { - return Err(SnapshotSandboxMismatch); + { + let host_funcs = self + .host_funcs + .try_lock() + .map_err(|e| crate::new_error!("Error locking host_funcs: {}", e))?; + snapshot.validate_compatibility(&self.mem_mgr.layout, &host_funcs)?; } let (gsnapshot, gscratch) = self.mem_mgr.restore_snapshot(&snapshot)?; @@ -1652,8 +1653,63 @@ mod tests { assert_eq!(new_read, orig_read); } + /// Compaction copies mapped-region pages into the snapshot blob, + /// so cross-instance restore preserves their contents without the + /// target ever mapping the region. #[test] - fn snapshot_different_sandbox() { + fn snapshot_restore_across_sandboxes_preserves_mapped_region_contents() { + let mut source: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + + let map_mem = allocate_guest_memory(); + let guest_base = 0x200000000_usize; + let region = region_for_memory(&map_mem, guest_base, MemoryRegionFlags::READ); + unsafe { source.map_region(®ion).unwrap() }; + + // do_map=true installs the guest PTE for the region. + let orig_read = source + .call::>( + "ReadMappedBuffer", + ( + guest_base as u64, + hyperlight_common::vmem::PAGE_SIZE as u64, + true, + ), + ) + .unwrap(); + + let snapshot = source.snapshot().unwrap(); + + let mut target: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + assert_eq!(target.vm.get_mapped_regions().count(), 0); + + target.restore(snapshot).unwrap(); + assert_eq!(target.vm.get_mapped_regions().count(), 0); + + // Snapshot PTEs resolve to GPAs in the snapshot blob, so the + // data is readable without re-mapping. + let new_read = target + .call::>( + "ReadMappedBuffer", + ( + guest_base as u64, + hyperlight_common::vmem::PAGE_SIZE as u64, + false, + ), + ) + .unwrap(); + assert_eq!(new_read, orig_read); + } + + #[test] + fn snapshot_restore_across_sandboxes() { let mut sandbox = { let path = simple_guest_as_string().unwrap(); let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); @@ -1665,23 +1721,183 @@ mod tests { let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); u_sbox.evolve().unwrap() }; - assert_ne!(sandbox.id, sandbox2.id); + + sandbox.call::("AddToStatic", 42i32).unwrap(); + assert_eq!(sandbox2.call::("GetStatic", ()).unwrap(), 0); let snapshot = sandbox.snapshot().unwrap(); - let err = sandbox2.restore(snapshot.clone()); - assert!(matches!(err, Err(HyperlightError::SnapshotSandboxMismatch))); + sandbox2.restore(snapshot).unwrap(); + assert_eq!(sandbox2.call::("GetStatic", ()).unwrap(), 42); + } - let sandbox_id = sandbox.id; - drop(sandbox); - drop(sandbox2); - drop(snapshot); + #[test] + fn snapshot_restore_rejects_incompatible_layout() { + let mut sandbox = { + let path = simple_guest_as_string().unwrap(); + let mut cfg = SandboxConfiguration::default(); + cfg.set_heap_size(0x10_000); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg)).unwrap(); + u_sbox.evolve().unwrap() + }; + + let mut sandbox2 = { + let path = simple_guest_as_string().unwrap(); + let mut cfg = SandboxConfiguration::default(); + cfg.set_heap_size(0x20_000); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg)).unwrap(); + u_sbox.evolve().unwrap() + }; + + let snapshot = sandbox.snapshot().unwrap(); + let err = sandbox2.restore(snapshot); + assert!(matches!(err, Err(HyperlightError::SnapshotLayoutMismatch))); + } + + /// Validation runs before any memory or vCPU mutation, so a + /// rejected `restore` leaves the target usable. + #[test] + fn snapshot_restore_failure_leaves_target_usable() { + let path = simple_guest_as_string().unwrap(); + let mut cfg_a = SandboxConfiguration::default(); + cfg_a.set_heap_size(0x10_000); + let mut source = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg_a)) + .unwrap() + .evolve() + .unwrap(); + + let path = simple_guest_as_string().unwrap(); + let mut cfg_b = SandboxConfiguration::default(); + cfg_b.set_heap_size(0x20_000); + let mut target = UninitializedSandbox::new(GuestBinary::FilePath(path), Some(cfg_b)) + .unwrap() + .evolve() + .unwrap(); - let sandbox3 = { + target.call::("AddToStatic", 5i32).unwrap(); + let bad_snapshot = source.snapshot().unwrap(); + let err = target.restore(bad_snapshot); + assert!(matches!(err, Err(HyperlightError::SnapshotLayoutMismatch))); + + assert_eq!(target.call::("GetStatic", ()).unwrap(), 5); + target.call::("AddToStatic", 3i32).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 8); + + let good_snapshot = target.snapshot().unwrap(); + target.call::("AddToStatic", 100i32).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 108); + target.restore(good_snapshot).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 8); + } + + /// `snapshot.regions()` is empty post-compaction, so restore + /// unmaps anything the target had mapped. + #[test] + fn snapshot_restore_across_sandboxes_target_has_mapped_regions() { + let mut source: MultiUseSandbox = { let path = simple_guest_as_string().unwrap(); let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); u_sbox.evolve().unwrap() }; - assert_ne!(sandbox3.id, sandbox_id); + source.call::("AddToStatic", 23i32).unwrap(); + let snapshot = source.snapshot().unwrap(); + + let mut target: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + let map_mem = allocate_guest_memory(); + let guest_base = 0x200000000_usize; + let region = region_for_memory(&map_mem, guest_base, MemoryRegionFlags::READ); + unsafe { target.map_region(®ion).unwrap() }; + assert_eq!(target.vm.get_mapped_regions().count(), 1); + + target.restore(snapshot).unwrap(); + assert_eq!(target.vm.get_mapped_regions().count(), 0); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 23); + } + + /// Compacted snapshot data is reachable at the source's GVA even + /// when the target had a different region mapped at a different + /// GVA. + #[test] + fn snapshot_restore_across_sandboxes_both_have_different_mapped_regions() { + let mut source: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + let source_mem = allocate_guest_memory(); + let source_base = 0x200000000_usize; + let source_region = region_for_memory(&source_mem, source_base, MemoryRegionFlags::READ); + unsafe { source.map_region(&source_region).unwrap() }; + let orig_read = source + .call::>( + "ReadMappedBuffer", + ( + source_base as u64, + hyperlight_common::vmem::PAGE_SIZE as u64, + true, + ), + ) + .unwrap(); + source.call::("AddToStatic", 9i32).unwrap(); + let snapshot = source.snapshot().unwrap(); + + let mut target: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + let target_mem = allocate_guest_memory(); + let target_base = 0x300000000_usize; + let target_region = region_for_memory(&target_mem, target_base, MemoryRegionFlags::READ); + unsafe { target.map_region(&target_region).unwrap() }; + assert_eq!(target.vm.get_mapped_regions().count(), 1); + + target.restore(snapshot).unwrap(); + + assert_eq!(target.vm.get_mapped_regions().count(), 0); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 9); + + let new_read = target + .call::>( + "ReadMappedBuffer", + ( + source_base as u64, + hyperlight_common::vmem::PAGE_SIZE as u64, + false, + ), + ) + .unwrap(); + assert_eq!(new_read, orig_read); + } + + /// Repeated restore of the same snapshot is idempotent. + #[test] + fn snapshot_restore_across_sandboxes_repeated() { + let mut source: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + source.call::("AddToStatic", 7i32).unwrap(); + let snapshot = source.snapshot().unwrap(); + + let mut target: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + + target.restore(snapshot.clone()).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 7); + + target.call::("AddToStatic", 1000i32).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 1007); + + target.restore(snapshot).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 7); } /// Test that snapshot restore properly resets vCPU debug registers. This test verifies @@ -2926,7 +3142,9 @@ mod tests { use crate::func::Registerable; use crate::sandbox::SandboxConfiguration; use crate::sandbox::snapshot::Snapshot; - use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + use crate::{ + GuestBinary, HostFunctions, HyperlightError, MultiUseSandbox, UninitializedSandbox, + }; fn make_sandbox() -> MultiUseSandbox { let path = simple_guest_as_string().unwrap(); @@ -2976,9 +3194,9 @@ mod tests { assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); } - /// Sandboxes built from clones of one `Arc` share - /// `sandbox_id` (so both can `restore` to it) but are - /// memory-isolated from each other. + /// Two sandboxes built from clones of one `Arc` can + /// each `restore` back to it, and stay memory-isolated from + /// each other in between. #[test] fn arc_clone_isolation_and_restore_compat() { let mut sbox = make_sandbox(); @@ -3020,8 +3238,84 @@ mod tests { let snap = sbox.snapshot().unwrap(); let err = MultiUseSandbox::from_snapshot(snap, HostFunctions::default(), None) .expect_err("missing `Add` must be rejected"); - let msg = format!("{}", err); - assert!(msg.contains("Add"), "got: {}", msg); + assert!( + matches!( + &err, + HyperlightError::SnapshotHostFunctionMismatch { missing, signature_mismatches } + if missing.iter().any(|n| n == "Add") && signature_mismatches.is_empty() + ), + "got: {:?}", + err + ); + } + + /// `restore` must also reject a snapshot whose required host + /// functions are not a subset of the target sandbox's. This + /// matters across sandboxes: a snapshot taken from a sandbox + /// with `Add` registered cannot be restored into a layout + /// compatible sandbox that lacks `Add`. + #[test] + fn restore_rejects_missing_host_function() { + let mut sbox_with_add = make_sandbox_with_add(); + let snap = sbox_with_add.snapshot().unwrap(); + let mut sbox_without_add = make_sandbox(); + let err = sbox_without_add + .restore(snap) + .expect_err("missing `Add` must be rejected on restore"); + assert!( + matches!( + &err, + HyperlightError::SnapshotHostFunctionMismatch { missing, .. } + if missing.iter().any(|n| n == "Add") + ), + "got: {:?}", + err + ); + } + + /// `restore` rejects a snapshot whose required host function + /// shares a name with the target's but disagrees on signature. + #[test] + fn restore_rejects_signature_mismatch() { + let mut sbox_with_add = make_sandbox_with_add(); + let snap = sbox_with_add.snapshot().unwrap(); + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: String, b: String| Ok(format!("{a}{b}"))) + .unwrap(); + let mut sbox_wrong_add = u.evolve().unwrap(); + let err = sbox_wrong_add + .restore(snap) + .expect_err("signature mismatch on `Add` must be rejected on restore"); + assert!( + matches!( + &err, + HyperlightError::SnapshotHostFunctionMismatch { missing, signature_mismatches } + if missing.is_empty() && signature_mismatches.iter().any(|s| s.contains("Add")) + ), + "got: {:?}", + err + ); + } + + /// Cross-instance `restore` succeeds when the target registers + /// a strict superset of the snapshot's host functions. + #[test] + fn restore_across_sandboxes_with_superset_host_funcs() { + let mut source = make_sandbox_with_add(); + source.call::("AddToStatic", 17i32).unwrap(); + let snap = source.snapshot().unwrap(); + + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.register_host_function("Mul", |a: i32, b: i32| Ok(a * b)) + .unwrap(); + let mut target = u.evolve().unwrap(); + + target.restore(snap).unwrap(); + assert_eq!(target.call::("GetStatic", ()).unwrap(), 17); } #[test] @@ -3033,8 +3327,15 @@ mod tests { .unwrap(); let err = MultiUseSandbox::from_snapshot(snap, hf, None) .expect_err("signature mismatch on `Add` must be rejected"); - let msg = format!("{}", err); - assert!(msg.contains("Add"), "got: {}", msg); + assert!( + matches!( + &err, + HyperlightError::SnapshotHostFunctionMismatch { missing, signature_mismatches } + if missing.is_empty() && signature_mismatches.iter().any(|s| s.contains("Add")) + ), + "got: {:?}", + err + ); } /// Supplied host-function set may be a strict superset of the diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index 6d4eae452..91fad0d4c 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -15,7 +15,6 @@ limitations under the License. */ use std::collections::{BTreeMap, HashMap}; -use std::sync::atomic::{AtomicU64, Ordering}; use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; @@ -35,8 +34,6 @@ use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::uninitialized::{GuestBinary, GuestEnvironment}; -pub(super) static SANDBOX_CONFIGURATION_COUNTER: AtomicU64 = AtomicU64::new(0); - const PTE_SIZE: usize = size_of::(); /// Presently, a snapshot can be of a preinitialised sandbox, which @@ -65,15 +62,9 @@ pub enum NextAction { /// A wrapper around a `SharedMemory` reference and a snapshot /// of the memory therein pub struct Snapshot { - /// Unique ID of the sandbox configuration for sandboxes where - /// this snapshot may be restored. - sandbox_id: u64, /// Layout object for the sandbox. TODO: get rid of this and /// replace with something saner and set up from the guest (early /// on?). - /// - /// Not checked on restore, since any sandbox with the same - /// configuration id will share the same layout layout: crate::mem::layout::SandboxMemoryLayout, /// Memory of the sandbox at the time this snapshot was taken memory: ReadonlySharedMemory, @@ -376,7 +367,6 @@ impl Snapshot { let extra_regions = Vec::new(); Ok(Self { - sandbox_id: SANDBOX_CONFIGURATION_COUNTER.fetch_add(1, Ordering::Relaxed), memory: ReadonlySharedMemory::from_bytes(&memory, layout.snapshot_size)?, layout, regions: extra_regions, @@ -403,7 +393,6 @@ impl Snapshot { pub(crate) fn new( shared_mem: &mut SnapshotSharedMemory, scratch_mem: &mut S, - sandbox_id: u64, mut layout: SandboxMemoryLayout, load_info: LoadInfo, regions: Vec, @@ -568,7 +557,6 @@ impl Snapshot { let regions: Vec = Vec::new(); Ok(Self { - sandbox_id, layout, memory: ReadonlySharedMemory::from_bytes(&memory, guest_visible_size)?, regions, @@ -586,11 +574,6 @@ impl Snapshot { self.snapshot_generation } - /// The id of the sandbox this snapshot was taken from. - pub(crate) fn sandbox_id(&self) -> u64 { - self.sandbox_id - } - /// Get the mapped regions from this snapshot pub(crate) fn regions(&self) -> &[MemoryRegion] { &self.regions @@ -640,7 +623,10 @@ impl Snapshot { /// A snapshot with no recorded host functions (e.g. one /// produced by a test-only constructor) accepts any `provided` /// set. - pub(crate) fn validate_host_functions(&self, provided: &crate::HostFunctions) -> Result<()> { + pub(crate) fn validate_host_functions( + &self, + provided: &crate::sandbox::host_funcs::FunctionRegistry, + ) -> Result<()> { let required = match &self.host_functions.host_functions { Some(v) => v, None => return Ok(()), @@ -653,7 +639,7 @@ impl Snapshot { let mut signature_mismatches: Vec = Vec::new(); for req in required { - match provided.inner().function_signature(&req.function_name) { + match provided.function_signature(&req.function_name) { // Function name is absent from the provided registry. None => missing.push(req.function_name.clone()), // Function exists, but signature does not match. @@ -684,11 +670,30 @@ impl Snapshot { return Ok(()); } - Err(crate::new_error!( - "snapshot host function mismatch: missing={:?}, signature_mismatches={:?}", + Err(crate::HyperlightError::SnapshotHostFunctionMismatch { missing, - signature_mismatches - )) + signature_mismatches, + }) + } + + /// Validate that this snapshot can be applied to a sandbox with + /// the given memory layout and host-function registry. + /// + /// The layout must be structurally compatible with the snapshot's + /// layout (see + /// [`SandboxMemoryLayout::is_compatible_with`](crate::mem::layout::SandboxMemoryLayout::is_compatible_with)), + /// and the registry must be a superset of the host functions the + /// snapshot requires (see + /// [`validate_host_functions`](Self::validate_host_functions)). + pub(crate) fn validate_compatibility( + &self, + layout: &crate::mem::layout::SandboxMemoryLayout, + host_funcs: &crate::sandbox::host_funcs::FunctionRegistry, + ) -> Result<()> { + if !self.layout().is_compatible_with(layout) { + return Err(crate::HyperlightError::SnapshotLayoutMismatch); + } + self.validate_host_functions(host_funcs) } } @@ -760,7 +765,6 @@ mod tests { let snapshot_a = super::Snapshot::new( &mut make_simple_pt_mem(&pattern_a).build().0, &mut mgr.scratch_mem, - 1, mgr.layout, LoadInfo::dummy(), Vec::new(), @@ -778,7 +782,6 @@ mod tests { let snapshot_b = super::Snapshot::new( &mut make_simple_pt_mem(&pattern_b).build().0, &mut mgr.scratch_mem, - 2, mgr.layout, LoadInfo::dummy(), Vec::new(), From 921c51e9314b81d28be765d75e71c0ce67877bfe Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 2/5] Add OCI snapshot persistence Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- Cargo.lock | 186 +++ src/hyperlight_host/Cargo.toml | 5 +- src/hyperlight_host/src/mem/shared_mem.rs | 1 - .../src/sandbox/initialized_multi_use.rs | 14 + .../src/sandbox/snapshot/file.rs | 1390 +++++++++++++++++ .../src/sandbox/snapshot/mod.rs | 3 + 6 files changed, 1597 insertions(+), 2 deletions(-) create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file.rs diff --git a/Cargo.lock b/Cargo.lock index 62280bbbb..6bce49fc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -530,6 +530,27 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -730,6 +751,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "derive_arbitrary" version = "1.4.2" @@ -741,6 +797,37 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -1154,6 +1241,18 @@ dependencies = [ "wasip3", ] +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "gimli" version = "0.33.0" @@ -1361,6 +1460,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.0" @@ -1585,6 +1690,7 @@ dependencies = [ "gdbstub", "gdbstub_arch", "goblin", + "hex", "hyperlight-common", "hyperlight-component-macro", "hyperlight-guest-tracing", @@ -1600,6 +1706,7 @@ dependencies = [ "metrics-util", "mshv-bindings", "mshv-ioctls", + "oci-spec", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", @@ -1612,6 +1719,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "signal-hook-registry", "tempfile", "termcolor", @@ -1790,6 +1898,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -1913,6 +2027,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kurbo" version = "0.11.3" @@ -2333,6 +2462,23 @@ dependencies = [ "ruzstd", ] +[[package]] +name = "oci-spec" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -2781,6 +2927,28 @@ dependencies = [ "toml_edit", ] +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3433,6 +3601,24 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "syn" version = "2.0.117" diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 2c19e4082..a774b5786 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -49,9 +49,13 @@ thiserror = "2.0.18" chrono = { version = "0.4", optional = true } anyhow = "1.0" metrics = "0.24.6" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" elfcore = { version = "2.0", optional = true } uuid = { version = "1.23.2", features = ["v4"] } +oci-spec = { version = "0.8", default-features = false, features = ["image"] } +sha2 = "0.10" +hex = "0.4" [target.'cfg(windows)'.dependencies] windows = { version = "0.62", features = [ @@ -83,7 +87,6 @@ mshv-ioctls = { version = "0.6", optional = true} [dev-dependencies] uuid = { version = "1.23.2", features = ["v4"] } signal-hook-registry = "1.4.8" -serde = "1.0" iced-x86 = { version = "1.21", default-features = false, features = ["std", "code_asm"] } proptest = "1.11.0" tempfile = "3.27.0" diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index f7a8cb3a5..41a824c7e 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -1568,7 +1568,6 @@ impl ReadonlySharedMemory { /// The file's length must be a non-zero multiple of `PAGE_SIZE`. /// `guest_mapped_size` must be a non-zero multiple of `PAGE_SIZE` /// no greater than the file's length. - #[cfg_attr(not(test), expect(dead_code))] pub(crate) fn from_file(file: &std::fs::File, guest_mapped_size: usize) -> Result { let len: usize = file .metadata() diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index f411bf0e9..380f9b0bb 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -185,6 +185,20 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// From a snapshot loaded from disk: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox}; + /// # use hyperlight_host::sandbox::snapshot::Snapshot; + /// # fn example() -> Result<(), Box> { + /// let snapshot = Arc::new(Snapshot::from_oci("./guest_snapshot", "latest")?); + /// let mut sandbox = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: String = sandbox.call("Echo", "hello".to_string())?; + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub fn from_snapshot( snapshot: Arc, diff --git a/src/hyperlight_host/src/sandbox/snapshot/file.rs b/src/hyperlight_host/src/sandbox/snapshot/file.rs new file mode 100644 index 000000000..7a4588b02 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file.rs @@ -0,0 +1,1390 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! On-disk snapshot format: an OCI Image Layout directory. +//! +//! Layout produced by `Snapshot::to_oci`: +//! +//! ```text +//! path/ +//! oci-layout {"imageLayoutVersion":"1.0.0"} +//! index.json one descriptor per tagged snapshot, +//! each tagged via the OCI standard +//! `org.opencontainers.image.ref.name` +//! annotation +//! blobs/sha256/ +//! OCI image manifest JSON +//! Hyperlight config JSON +//! raw memory bytes +//! (`memory_size` bytes) +//! ``` +//! +//! ## Multiple snapshots per layout +//! +//! A single layout directory can hold any number of snapshots, each +//! addressed by tag. Adding a snapshot to an existing layout is the +//! point of OCI Image Layout: blobs are content-addressed by sha256, +//! so two snapshots that share bytes (a common base, an identical +//! config, and so on) share files in `blobs/sha256/`. This is how +//! the format dovetails with `oras cp` / registry pull-push pipelines +//! that move many tagged snapshots through one store. +//! +//! ## Strictness boundary +//! +//! The Hyperlight config blob (`HlConfig` and friends) uses +//! `#[serde(deny_unknown_fields)]` everywhere: any unknown key is a +//! breaking media-type bump, by design. +//! +//! The OCI manifest and index JSON are parsed via `oci-spec`'s +//! `ImageManifest` / `ImageIndex`, which do **not** use +//! `deny_unknown_fields`. This is intentional: third-party tools +//! (cosign, ORAS, build pipelines) routinely add manifest- and +//! index-level annotations, and a sandbox image must continue to +//! load even when those extras are present. +//! +//! ## Snapshot blob layout +//! +//! The snapshot blob is the raw memory image: exactly +//! `memory_size` bytes. Guard pages live in the host mapping +//! around the blob: +//! +//! * On Linux, an anonymous `PROT_NONE` reservation surrounds a +//! `MAP_FIXED` file mapping in the middle. +//! * On Windows, a `VirtualAlloc2` placeholder reservation surrounds +//! a `MapViewOfFile3` view in the middle, with guard pages held as +//! reserved (but unmapped) address space. +//! +//! The blob is byte-identical across platforms, so sharing it +//! through OCI registries works unchanged. + +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::Path; + +use hyperlight_common::flatbuffer_wrappers::function_types::{ParameterType, ReturnType}; +use hyperlight_common::flatbuffer_wrappers::host_function_definition::HostFunctionDefinition; +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +use hyperlight_common::vmem::PAGE_SIZE; +use oci_spec::image::{ + DescriptorBuilder, Digest, ImageIndex, ImageIndexBuilder, ImageManifest, ImageManifestBuilder, + MediaType, SCHEMA_VERSION, +}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest as _, Sha256}; + +use super::{NextAction, Snapshot}; +use crate::hypervisor::regs::{CommonSegmentRegister, CommonSpecialRegisters, CommonTableRegister}; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::memory_region::MemoryRegionFlags; +use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; + +// --- Constants ------------------------------------------------------ + +const OCI_LAYOUT_VERSION: &str = "1.0.0"; + +// Media types are versioned by suffix. The loader matches each +// version specifically (no `_CURRENT` shortcut on the read side); the +// writer always emits `_CURRENT`. A new version is added by: +// +// 1. Declare `MT_FOO_V2` next to `MT_FOO_V1`. +// 2. Point `MT_FOO_CURRENT` at `MT_FOO_V2`. +// 3. Add a dispatch arm in the loader that converts v1 -> v2 (or +// rejects v1 if no compatibility window is offered). +const MT_CONFIG_V1: &str = "application/vnd.hyperlight.sandbox.config.v1+json"; +const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +const MT_SNAPSHOT_V1: &str = "application/vnd.hyperlight.snapshot.v1"; +const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; + +/// ABI version for the snapshot memory blob. Bumped whenever the +/// host-guest contract for the bytes inside the snapshot blob changes +/// (PEB layout, calling convention, init state, etc.). Independent of +/// the config blob's media-type version. +const SNAPSHOT_ABI_VERSION: u32 = 1; + +/// Maximum size of the config JSON blob. Bounds the allocation done +/// before we parse the JSON. +const MAX_CONFIG_BLOB_SIZE: u64 = 1024 * 1024; + +impl Snapshot { + /// Save this snapshot as a tagged manifest inside an OCI Image + /// Layout directory at `path`. + /// + /// `tag` is written to `index.json` as + /// `org.opencontainers.image.ref.name` and must satisfy the OCI + /// tag grammar (`[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`). + /// + /// If `path` does not exist, a new OCI layout is created. If it + /// exists, it must already be a valid OCI layout. Existing blobs + /// are deduplicated by digest. Existing tags are not overwritten. + /// + /// Save is not crash-atomic. If interrupted, delete the layout and + /// re-save. + /// + /// # Portability + /// + /// Snapshot images are not portable across CPU architectures, + /// hypervisors, or operating systems. All three are recorded in + /// the config blob and checked at load time; mismatches are + /// rejected with a clear error. + pub fn to_oci(&self, path: impl AsRef, tag: &str) -> crate::Result<()> { + let path = path.as_ref(); + validate_tag(tag)?; + + // Decide whether we are creating a fresh layout or appending + // to an existing one. + let appending = match std::fs::symlink_metadata(path) { + Ok(meta) if meta.is_dir() => { + // Directory exists. It must be a valid (empty or + // populated) OCI layout, otherwise we refuse to + // touch it. An empty directory is treated as + // not-a-layout to avoid quietly turning user data + // into a snapshot store. + if !read_oci_layout_marker(path)? { + return Err(crate::new_error!( + "to_oci refusing to write: {:?} exists but is not an OCI image layout \ + (no `oci-layout` marker). Remove it or choose a different path.", + path + )); + } + true + } + Ok(_) => { + return Err(crate::new_error!( + "to_oci refusing to write: path {:?} exists but is not a directory", + path + )); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, + Err(e) => { + return Err(crate::new_error!("to_oci failed to stat {:?}: {}", path, e)); + } + }; + + // Load the existing index (if any) so we can detect a tag + // collision before doing any work. + let mut existing_manifests: Vec = if appending { + let idx = ImageIndex::from_file(path.join("index.json")).map_err(|e| { + crate::new_error!( + "to_oci: failed to read existing index.json at {:?}: {}", + path.join("index.json"), + e + ) + })?; + if let Some(existing) = idx.manifests().iter().find(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() == tag) + .unwrap_or(false) + }) { + return Err(crate::new_error!( + "to_oci refusing to overwrite tag {:?} in {:?} (existing digest {}). \ + Delete the tag first or use a different name.", + tag, + path, + existing.digest().as_ref() + )); + } + idx.manifests().clone() + } else { + Vec::new() + }; + + let blobs_dir = path.join("blobs").join("sha256"); + std::fs::create_dir_all(&blobs_dir) + .map_err(|e| crate::new_error!("failed to create OCI layout dir {:?}: {}", path, e))?; + + // 1. Snapshot blob: the raw memory bytes. Stream sha256 + // hasher over the bytes as we write, then rename to the + // digest filename. If a blob with the same digest + // already exists in this layout (another snapshot shares + // it), discard the temp file. + let memory_bytes = self.memory.as_slice(); + let memory_size = memory_bytes.len(); + if memory_size == 0 || memory_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory size {} must be a non-zero multiple of PAGE_SIZE", + memory_size + )); + } + let blob_total = memory_size; + + let snapshot_digest = { + let tmp_path = blobs_dir.join(".tmp-snapshot"); + let mut f = std::fs::File::create(&tmp_path).map_err(|e| { + crate::new_error!("failed to create snapshot blob temp {:?}: {}", tmp_path, e) + })?; + let mut hasher = Sha256::new(); + + f.write_all(memory_bytes) + .map_err(|e| crate::new_error!("snapshot blob write error: {}", e))?; + hasher.update(memory_bytes); + + let digest = Digest256::from_hasher(hasher); + let final_path = blobs_dir.join(&digest.hex); + if final_path.exists() { + // Same content already on disk, contributed by an + // earlier tag. Discard the temp file. + let _ = std::fs::remove_file(&tmp_path); + } else { + std::fs::rename(&tmp_path, &final_path).map_err(|e| { + crate::new_error!( + "failed to rename snapshot blob {:?} -> {:?}: {}", + tmp_path, + final_path, + e + ) + })?; + } + digest + }; + + // 2. Config blob. + let cfg = self.build_config()?; + let cfg_bytes = serde_json::to_vec_pretty(&cfg) + .map_err(|e| crate::new_error!("failed to serialise config JSON: {}", e))?; + let cfg_digest = Digest256::from_bytes(&cfg_bytes); + write_blob_dedup(&blobs_dir, &cfg_digest.hex, &cfg_bytes)?; + + // 3. Manifest. + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_CONFIG_CURRENT.to_string())) + .digest(oci_digest(&cfg_digest)?) + .size(cfg_bytes.len() as u64) + .build() + .map_err(|e| crate::new_error!("failed to build config descriptor: {}", e))?; + let snapshot_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_SNAPSHOT_CURRENT.to_string())) + .digest(oci_digest(&snapshot_digest)?) + .size(blob_total as u64) + .build() + .map_err(|e| crate::new_error!("failed to build snapshot descriptor: {}", e))?; + let manifest = ImageManifestBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(vec![snapshot_descriptor]) + .build() + .map_err(|e| crate::new_error!("failed to build OCI manifest: {}", e))?; + let manifest_bytes = serde_json::to_vec_pretty(&manifest) + .map_err(|e| crate::new_error!("failed to serialise OCI manifest: {}", e))?; + let manifest_digest = Digest256::from_bytes(&manifest_bytes); + write_blob_dedup(&blobs_dir, &manifest_digest.hex, &manifest_bytes)?; + + // 4. Append manifest descriptor (with the tag annotation) + // to index.json. + let mut anns = std::collections::HashMap::new(); + anns.insert(ANNOTATION_REF_NAME.to_string(), tag.to_string()); + let manifest_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageManifest) + .digest(oci_digest(&manifest_digest)?) + .size(manifest_bytes.len() as u64) + .annotations(anns) + .build() + .map_err(|e| crate::new_error!("failed to build manifest descriptor: {}", e))?; + existing_manifests.push(manifest_descriptor); + let index = ImageIndexBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageIndex) + .manifests(existing_manifests) + .build() + .map_err(|e| crate::new_error!("failed to build OCI index: {}", e))?; + let index_bytes = serde_json::to_vec_pretty(&index) + .map_err(|e| crate::new_error!("failed to serialise OCI index: {}", e))?; + std::fs::write(path.join("index.json"), &index_bytes) + .map_err(|e| crate::new_error!("failed to write index.json: {}", e))?; + + // 5. oci-layout marker (idempotent: same content every time). + if !appending { + let layout_bytes = serde_json::to_vec(&serde_json::json!({ + "imageLayoutVersion": OCI_LAYOUT_VERSION, + })) + .map_err(|e| crate::new_error!("failed to serialise oci-layout: {}", e))?; + std::fs::write(path.join("oci-layout"), &layout_bytes) + .map_err(|e| crate::new_error!("failed to write oci-layout: {}", e))?; + } + + Ok(()) + } + + fn build_config(&self) -> crate::Result { + let entrypoint = match (self.entrypoint, self.sregs.as_ref()) { + (NextAction::Initialise(addr), None) => EntrypointRepr::Initialise { addr }, + (NextAction::Call(addr), Some(sregs)) => EntrypointRepr::Call { + addr, + sregs: Box::new(SregsRepr::from(sregs)), + }, + (NextAction::Initialise(_), Some(_)) => { + return Err(crate::new_error!( + "snapshot inconsistent: Initialise entrypoint must not have sregs" + )); + } + (NextAction::Call(_), None) => { + return Err(crate::new_error!( + "snapshot inconsistent: Call entrypoint must have sregs" + )); + } + #[cfg(test)] + (NextAction::None, _) => { + return Err(crate::new_error!( + "snapshot with NextAction::None cannot be persisted" + )); + } + }; + + let host_functions = match &self.host_functions.host_functions { + Some(v) => v.iter().map(HostFunctionRepr::from).collect(), + None => Vec::new(), + }; + + let l = &self.layout; + Ok(HlConfig { + hyperlight_version: env!("CARGO_PKG_VERSION").to_string(), + arch: ArchTag::current(), + abi_version: SNAPSHOT_ABI_VERSION, + hypervisor: HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to tag snapshot"))?, + stack_top_gva: self.stack_top_gva, + entrypoint, + layout: LayoutFields { + input_data_size: l.input_data_size, + output_data_size: l.output_data_size, + heap_size: l.heap_size, + code_size: l.code_size, + init_data_size: l.init_data_size, + init_data_permissions: l.init_data_permissions.map(|f| f.bits()), + scratch_size: l.get_scratch_size(), + snapshot_size: l.snapshot_size, + pt_size: l.pt_size, + }, + memory_size: self.memory.mem_size() as u64, + host_functions, + }) + } + + /// Load the snapshot tagged `tag` from an OCI Image Layout + /// directory at `path`. + /// + /// `tag` selects one manifest from `index.json` using + /// `org.opencontainers.image.ref.name`. Missing tags and duplicate + /// tags are rejected. + /// + /// This verifies sha256 for manifest, config, and snapshot blobs. + /// Use [`Snapshot::from_oci_unchecked`] to skip digest verification + /// in trusted paths. + /// + /// Returns an error for arch, hypervisor, OS, and ABI mismatches. + /// + /// # File-mutation hazard + /// + /// Do not modify or replace files in `path` while the returned + /// `Snapshot` (or sandboxes built from it) is still alive. + pub fn from_oci(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, true) + } + + /// Like [`Snapshot::from_oci`] but **skips sha256 verification of + /// the manifest, config, and snapshot blob bytes**, trading + /// integrity checking for performance. All other validation + /// (OCI structure, descriptor sizes, schema versions, arch / + /// hypervisor / ABI tags, layout bounds, entrypoint bounds) is + /// unchanged. + pub fn from_oci_unchecked(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, false) + } + + fn from_oci_inner(path: &Path, tag: &str, verify_blobs: bool) -> crate::Result { + validate_tag(tag)?; + let meta = std::fs::metadata(path) + .map_err(|e| crate::new_error!("from_oci failed to stat {:?}: {}", path, e))?; + if !meta.is_dir() { + return Err(crate::new_error!( + "from_oci path {:?} is not a directory", + path + )); + } + + // 1. oci-layout + let layout_bytes = std::fs::read(path.join("oci-layout")).map_err(|e| { + crate::new_error!( + "missing or unreadable oci-layout at {:?}: {}", + path.join("oci-layout"), + e + ) + })?; + let layout_json: serde_json::Value = serde_json::from_slice(&layout_bytes) + .map_err(|e| crate::new_error!("oci-layout is not valid JSON: {}", e))?; + let v = layout_json + .get("imageLayoutVersion") + .and_then(|v| v.as_str()) + .ok_or_else(|| crate::new_error!("oci-layout missing imageLayoutVersion field"))?; + if v != OCI_LAYOUT_VERSION { + return Err(crate::new_error!( + "unsupported OCI image layout version {:?} (expected {:?})", + v, + OCI_LAYOUT_VERSION + )); + } + + // 2. index.json -> manifest descriptor for `tag`. Multiple + // manifests are fine in OCI Image Layout; we select the + // one whose `org.opencontainers.image.ref.name` annotation + // matches the requested tag. Two manifests with the same + // tag is a malformed layout. + let index = ImageIndex::from_file(path.join("index.json")) + .map_err(|e| crate::new_error!("failed to read or parse index.json: {}", e))?; + let mut matching = index.manifests().iter().filter(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() == tag) + .unwrap_or(false) + }); + let manifest_desc = match (matching.next(), matching.next()) { + (None, _) => { + let known: Vec<&str> = index + .manifests() + .iter() + .filter_map(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str()) + }) + .collect(); + return Err(crate::new_error!( + "no manifest tagged {:?} in OCI layout {:?}. Available tags: {:?}", + tag, + path, + known + )); + } + (Some(_), Some(_)) => { + return Err(crate::new_error!( + "OCI layout {:?} has multiple manifests tagged {:?}; tags must be unique", + path, + tag + )); + } + (Some(d), None) => d, + }; + let manifest_hex = parse_oci_digest(manifest_desc.digest().as_ref())?; + + // 3. manifest blob + let manifest_path = path.join("blobs").join("sha256").join(&manifest_hex); + let manifest_bytes = read_bounded(&manifest_path, MAX_CONFIG_BLOB_SIZE)?; + if manifest_bytes.len() as u64 != manifest_desc.size() { + return Err(crate::new_error!( + "OCI manifest size mismatch: descriptor says {}, file is {}", + manifest_desc.size(), + manifest_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("manifest", &manifest_bytes, &manifest_hex)?; + } + let manifest: ImageManifest = serde_json::from_slice(&manifest_bytes) + .map_err(|e| crate::new_error!("failed to parse OCI manifest JSON: {}", e))?; + if manifest.schema_version() != SCHEMA_VERSION { + return Err(crate::new_error!( + "unsupported OCI manifest schemaVersion {} (expected {})", + manifest.schema_version(), + SCHEMA_VERSION + )); + } + let cfg_desc = manifest.config(); + // Loader dispatch on config media type. Today only v1 exists; + // v2 lands as a new arm here that converts to the in-memory + // current shape. + let cfg_media = cfg_desc.media_type().to_string(); + match cfg_media.as_str() { + MT_CONFIG_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected config media type {:?} (supported: {:?})", + other, + MT_CONFIG_V1 + )); + } + } + let layers = manifest.layers(); + if layers.len() != 1 { + return Err(crate::new_error!( + "expected exactly one OCI layer (the snapshot), found {}", + layers.len() + )); + } + let snap_desc = &layers[0]; + let snap_media = snap_desc.media_type().to_string(); + match snap_media.as_str() { + MT_SNAPSHOT_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected snapshot layer media type {:?} (supported: {:?})", + other, + MT_SNAPSHOT_V1 + )); + } + } + + // 4. config blob + let cfg_hex = parse_oci_digest(cfg_desc.digest().as_ref())?; + let cfg_path = path.join("blobs").join("sha256").join(&cfg_hex); + let cfg_bytes = read_bounded(&cfg_path, MAX_CONFIG_BLOB_SIZE)?; + if cfg_bytes.len() as u64 != cfg_desc.size() { + return Err(crate::new_error!( + "config blob size mismatch: descriptor says {}, file is {}", + cfg_desc.size(), + cfg_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("config", &cfg_bytes, &cfg_hex)?; + } + let cfg: HlConfig = serde_json::from_slice(&cfg_bytes) + .map_err(|e| crate::new_error!("failed to parse Hyperlight config JSON: {}", e))?; + cfg.validate_for_load()?; + + // 5. snapshot blob: open once, hash and mmap the same + // handle so an attacker cannot swap the file between + // verification and mapping. + let snap_hex = parse_oci_digest(snap_desc.digest().as_ref())?; + let snap_path = path.join("blobs").join("sha256").join(&snap_hex); + let mut snap_file = std::fs::File::open(&snap_path).map_err(|e| { + crate::new_error!("failed to open snapshot blob {:?}: {}", snap_path, e) + })?; + let snap_file_len = snap_file + .metadata() + .map_err(|e| crate::new_error!("failed to stat snapshot blob: {}", e))? + .len(); + let expected_blob_len = cfg.memory_size; + if snap_file_len != expected_blob_len { + return Err(crate::new_error!( + "snapshot blob size mismatch: file is {} bytes, expected {} \ + (memory_size)", + snap_file_len, + expected_blob_len, + )); + } + if snap_file_len != snap_desc.size() { + return Err(crate::new_error!( + "snapshot blob size {} disagrees with OCI descriptor size {}", + snap_file_len, + snap_desc.size() + )); + } + if verify_blobs { + verify_blob_file("snapshot", &mut snap_file, &snap_hex)?; + } + + // 6. Reconstruct layout. + let mut sbox_cfg = crate::sandbox::SandboxConfiguration::default(); + sbox_cfg.set_input_data_size(cfg.layout.input_data_size); + sbox_cfg.set_output_data_size(cfg.layout.output_data_size); + sbox_cfg.set_heap_size(cfg.layout.heap_size as u64); + sbox_cfg.set_scratch_size(cfg.layout.scratch_size); + let init_data_perms = match cfg.layout.init_data_permissions { + None => None, + Some(bits) => Some(MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?), + }; + let mut layout = SandboxMemoryLayout::new( + sbox_cfg, + cfg.layout.code_size, + cfg.layout.init_data_size, + init_data_perms, + )?; + // `snapshot_size` and `pt_size` are independent fields. + if let Some(pt) = cfg.layout.pt_size { + layout.set_pt_size(pt)?; + } + layout.set_snapshot_size(cfg.layout.snapshot_size); + + // 7. mmap the snapshot blob (file-backed CoW). The blob is + // the raw memory image. `ReadonlySharedMemory::from_file` + // surrounds it with host guard pages. The guest mapping + // of the snapshot region covers only the data prefix + // (`snapshot_size`). The PT tail sits past that prefix + // in the host mapping and is copied into the scratch + // region on restore. Keeping it out of the guest mapping + // of the snapshot region avoids overlap with + // `map_file_cow` regions installed immediately after the + // snapshot in guest PA space. + let memory = ReadonlySharedMemory::from_file(&snap_file, layout.snapshot_size)?; + + // 8. Build entrypoint + sregs back from the tagged enum. + let (entrypoint, sregs) = match cfg.entrypoint { + EntrypointRepr::Initialise { addr } => (NextAction::Initialise(addr), None), + EntrypointRepr::Call { addr, sregs } => ( + NextAction::Call(addr), + Some(CommonSpecialRegisters::from(*sregs)), + ), + }; + + // 9. Reconstitute host_functions metadata. + let host_funcs_vec: Vec = + cfg.host_functions.into_iter().map(Into::into).collect(); + let host_functions = if host_funcs_vec.is_empty() { + HostFunctionDetails { + host_functions: None, + } + } else { + HostFunctionDetails { + host_functions: Some(host_funcs_vec), + } + }; + + Ok(Snapshot { + layout, + memory, + regions: Vec::new(), + load_info: crate::mem::exe::LoadInfo::dummy(), + stack_top_gva: cfg.stack_top_gva, + sregs, + entrypoint, + snapshot_generation: 0, + host_functions, + }) + } +} + +// --- Hypervisor / arch tags ----------------------------------------- + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +enum ArchTag { + X86_64, + Aarch64, + I686, +} + +impl ArchTag { + fn current() -> Self { + #[cfg(feature = "i686-guest")] + { + Self::I686 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] + { + Self::X86_64 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "aarch64"))] + { + Self::Aarch64 + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub(super) enum HypervisorTag { + Kvm, + Mshv, + Whp, +} + +impl HypervisorTag { + pub(super) fn current() -> Option { + #[allow(unused_imports)] + use crate::hypervisor::virtual_machine::HypervisorType; + use crate::hypervisor::virtual_machine::get_available_hypervisor; + + match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Some(Self::Kvm), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Some(Self::Mshv), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Some(Self::Whp), + None => None, + } + } + + fn name(&self) -> &'static str { + match self { + Self::Kvm => "KVM", + Self::Mshv => "MSHV", + Self::Whp => "WHP", + } + } +} + +// --- Config JSON shape ---------------------------------------------- + +/// Top-level Hyperlight sandbox config JSON. Lives at +/// `blobs/sha256/` with media type +/// `application/vnd.hyperlight.sandbox.config.v1+json`. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct HlConfig { + /// Hyperlight crate version that produced this config. Recorded + /// for diagnostics. Not checked on load. + hyperlight_version: String, + arch: ArchTag, + /// Memory blob ABI version. See [`SNAPSHOT_ABI_VERSION`]. + abi_version: u32, + hypervisor: HypervisorTag, + /// Top of the guest stack, in guest virtual address space. + stack_top_gva: u64, + /// Tagged enum: `Initialise` carries an entry-point address only; + /// `Call` carries the dispatch function pointer plus the captured + /// sregs from the running vCPU. The shape itself enforces the + /// "Call has sregs, Initialise does not" invariant. + entrypoint: EntrypointRepr, + layout: LayoutFields, + /// Total size of the memory blob in bytes (including the guest + /// page-table tail, if any). Equal to `self.memory.mem_size()`. + memory_size: u64, + /// Names and signatures of host functions registered when this + /// snapshot was taken. Validated against the loader's registry. + host_functions: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "lowercase", deny_unknown_fields)] +/// On-disk next action stored with a snapshot. +/// +/// The enum shape enforces the invariant that `Call` includes `sregs` +/// and `Initialise` does not. Serde rejects missing or extra `sregs` +/// at parse time. +enum EntrypointRepr { + Initialise { addr: u64 }, + Call { addr: u64, sregs: Box }, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct LayoutFields { + input_data_size: usize, + output_data_size: usize, + heap_size: usize, + code_size: usize, + init_data_size: usize, + /// Memory region flag bits. `None` means default permissions. + init_data_permissions: Option, + scratch_size: usize, + snapshot_size: usize, + pt_size: Option, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct HostFunctionRepr { + function_name: String, + parameter_types: Vec, + return_type: ReturnTypeRepr, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ParameterType`]. +/// Kept local so we don't have to plumb serde through `hyperlight_common`. +/// The `match`es below are exhaustive: any new variant upstream forces +/// an explicit decision here. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ParameterTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + VecBytes, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ReturnType`]. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ReturnTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + Void, + VecBytes, +} + +impl From<&ParameterType> for ParameterTypeRepr { + fn from(p: &ParameterType) -> Self { + match p { + ParameterType::Int => Self::Int, + ParameterType::UInt => Self::UInt, + ParameterType::Long => Self::Long, + ParameterType::ULong => Self::ULong, + ParameterType::Float => Self::Float, + ParameterType::Double => Self::Double, + ParameterType::String => Self::String, + ParameterType::Bool => Self::Bool, + ParameterType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ParameterType { + fn from(r: ParameterTypeRepr) -> Self { + match r { + ParameterTypeRepr::Int => Self::Int, + ParameterTypeRepr::UInt => Self::UInt, + ParameterTypeRepr::Long => Self::Long, + ParameterTypeRepr::ULong => Self::ULong, + ParameterTypeRepr::Float => Self::Float, + ParameterTypeRepr::Double => Self::Double, + ParameterTypeRepr::String => Self::String, + ParameterTypeRepr::Bool => Self::Bool, + ParameterTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +impl From<&ReturnType> for ReturnTypeRepr { + fn from(r: &ReturnType) -> Self { + match r { + ReturnType::Int => Self::Int, + ReturnType::UInt => Self::UInt, + ReturnType::Long => Self::Long, + ReturnType::ULong => Self::ULong, + ReturnType::Float => Self::Float, + ReturnType::Double => Self::Double, + ReturnType::String => Self::String, + ReturnType::Bool => Self::Bool, + ReturnType::Void => Self::Void, + ReturnType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ReturnType { + fn from(r: ReturnTypeRepr) -> Self { + match r { + ReturnTypeRepr::Int => Self::Int, + ReturnTypeRepr::UInt => Self::UInt, + ReturnTypeRepr::Long => Self::Long, + ReturnTypeRepr::ULong => Self::ULong, + ReturnTypeRepr::Float => Self::Float, + ReturnTypeRepr::Double => Self::Double, + ReturnTypeRepr::String => Self::String, + ReturnTypeRepr::Bool => Self::Bool, + ReturnTypeRepr::Void => Self::Void, + ReturnTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct SregsRepr { + cs: SegmentRegisterRepr, + ds: SegmentRegisterRepr, + es: SegmentRegisterRepr, + fs: SegmentRegisterRepr, + gs: SegmentRegisterRepr, + ss: SegmentRegisterRepr, + tr: SegmentRegisterRepr, + ldt: SegmentRegisterRepr, + gdt: TableRegisterRepr, + idt: TableRegisterRepr, + cr0: u64, + cr2: u64, + cr3: u64, + cr4: u64, + cr8: u64, + efer: u64, + apic_base: u64, + interrupt_bitmap: [u64; 4], +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct SegmentRegisterRepr { + base: u64, + limit: u32, + selector: u16, + type_: u8, + present: u8, + dpl: u8, + db: u8, + s: u8, + l: u8, + g: u8, + avl: u8, + unusable: u8, + padding: u8, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct TableRegisterRepr { + base: u64, + limit: u16, +} + +// --- Conversions between repr and runtime types --------------------- + +impl From<&CommonSpecialRegisters> for SregsRepr { + fn from(s: &CommonSpecialRegisters) -> Self { + let seg = |r: &CommonSegmentRegister| SegmentRegisterRepr { + base: r.base, + limit: r.limit, + selector: r.selector, + type_: r.type_, + present: r.present, + dpl: r.dpl, + db: r.db, + s: r.s, + l: r.l, + g: r.g, + avl: r.avl, + unusable: r.unusable, + padding: r.padding, + }; + let tab = |r: &CommonTableRegister| TableRegisterRepr { + base: r.base, + limit: r.limit, + }; + Self { + cs: seg(&s.cs), + ds: seg(&s.ds), + es: seg(&s.es), + fs: seg(&s.fs), + gs: seg(&s.gs), + ss: seg(&s.ss), + tr: seg(&s.tr), + ldt: seg(&s.ldt), + gdt: tab(&s.gdt), + idt: tab(&s.idt), + cr0: s.cr0, + cr2: s.cr2, + cr3: s.cr3, + cr4: s.cr4, + cr8: s.cr8, + efer: s.efer, + apic_base: s.apic_base, + interrupt_bitmap: s.interrupt_bitmap, + } + } +} + +impl From for CommonSpecialRegisters { + fn from(r: SregsRepr) -> Self { + let seg = |s: SegmentRegisterRepr| CommonSegmentRegister { + base: s.base, + limit: s.limit, + selector: s.selector, + type_: s.type_, + present: s.present, + dpl: s.dpl, + db: s.db, + s: s.s, + l: s.l, + g: s.g, + avl: s.avl, + unusable: s.unusable, + padding: s.padding, + }; + let tab = |t: TableRegisterRepr| CommonTableRegister { + base: t.base, + limit: t.limit, + }; + Self { + cs: seg(r.cs), + ds: seg(r.ds), + es: seg(r.es), + fs: seg(r.fs), + gs: seg(r.gs), + ss: seg(r.ss), + tr: seg(r.tr), + ldt: seg(r.ldt), + gdt: tab(r.gdt), + idt: tab(r.idt), + cr0: r.cr0, + cr2: r.cr2, + cr3: r.cr3, + cr4: r.cr4, + cr8: r.cr8, + efer: r.efer, + apic_base: r.apic_base, + interrupt_bitmap: r.interrupt_bitmap, + } + } +} + +impl From<&HostFunctionDefinition> for HostFunctionRepr { + fn from(d: &HostFunctionDefinition) -> Self { + let parameter_types = d + .parameter_types + .as_ref() + .map(|v| v.iter().map(ParameterTypeRepr::from).collect()) + .unwrap_or_default(); + Self { + function_name: d.function_name.clone(), + parameter_types, + return_type: ReturnTypeRepr::from(&d.return_type), + } + } +} + +impl From for HostFunctionDefinition { + fn from(r: HostFunctionRepr) -> Self { + Self { + function_name: r.function_name, + parameter_types: Some(r.parameter_types.into_iter().map(Into::into).collect()), + return_type: r.return_type.into(), + } + } +} + +// --- sha256 helper -------------------------------------------------- + +/// A `sha256:` digest as recorded in OCI manifests. The bare hex +/// (without prefix) is also the blob's filename inside `blobs/sha256/`. +#[derive(Clone)] +struct Digest256 { + /// Lowercase hex of the 32-byte sha256 output. + hex: String, +} + +impl Digest256 { + fn from_bytes(bytes: &[u8]) -> Self { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + Self { + hex: hex::encode(arr), + } + } + + fn from_hasher(h: Sha256) -> Self { + let arr: [u8; 32] = h.finalize().into(); + Self { + hex: hex::encode(arr), + } + } +} + +/// Build an `oci_spec::image::Digest` from a [`Digest256`]. +fn oci_digest(d: &Digest256) -> crate::Result { + Digest::try_from(format!("sha256:{}", d.hex)) + .map_err(|e| crate::new_error!("failed to construct OCI digest: {}", e)) +} + +fn parse_oci_digest(s: &str) -> crate::Result { + let rest = s.strip_prefix("sha256:").ok_or_else(|| { + crate::new_error!( + "OCI descriptor digest {:?} is not a sha256 digest (only sha256 is supported)", + s + ) + })?; + if rest.len() != 64 || !rest.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(crate::new_error!( + "OCI descriptor digest {:?} is not a 64-character lowercase hex string", + s + )); + } + Ok(rest.to_lowercase()) +} + +// --- HlConfig validation -------------------------------------------- + +impl HlConfig { + fn validate_for_load(&self) -> crate::Result<()> { + if self.arch != ArchTag::current() { + return Err(crate::new_error!( + "snapshot architecture mismatch: file is {:?}, current host is {:?}", + self.arch, + ArchTag::current() + )); + } + if self.abi_version != SNAPSHOT_ABI_VERSION { + return Err(crate::new_error!( + "snapshot ABI version mismatch: file has version {}, this build expects {}. \ + The snapshot must be regenerated from the guest binary.", + self.abi_version, + SNAPSHOT_ABI_VERSION + )); + } + let current_hv = HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to load snapshot"))?; + if self.hypervisor != current_hv { + return Err(crate::new_error!( + "snapshot hypervisor mismatch: file was created on {} but the current hypervisor is {}", + self.hypervisor.name(), + current_hv.name() + )); + } + // Bound memory size early so the subsequent file-size check + // does not have to deal with absurd values. + if self.memory_size == 0 || self.memory_size > SandboxMemoryLayout::MAX_MEMORY_SIZE as u64 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is out of range", + self.memory_size + )); + } + if self.memory_size as usize % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is not a multiple of PAGE_SIZE", + self.memory_size + )); + } + // Invariant: `snapshot_size + pt_size == memory_size`. + // `snapshot_size` is the guest-visible prefix of the blob, + // mapped into guest PA space at `BASE_ADDRESS`. `pt_size` + // is the page-table tail that sits after it in the blob and + // the host mapping, outside the guest mapping of the + // snapshot region. + if self.layout.snapshot_size == 0 { + return Err(crate::new_error!("snapshot snapshot_size must be nonzero")); + } + if self.layout.snapshot_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) is not a multiple of PAGE_SIZE", + self.layout.snapshot_size + )); + } + let pt = self.layout.pt_size.unwrap_or(0); + if pt % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot pt_size ({}) is not a multiple of PAGE_SIZE", + pt + )); + } + if (self.layout.snapshot_size as u64).saturating_add(pt as u64) != self.memory_size { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) + pt_size ({}) does not equal memory_size ({})", + self.layout.snapshot_size, + pt, + self.memory_size + )); + } + if let Some(bits) = self.layout.init_data_permissions { + MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?; + } + + // Entrypoint address must point inside the guest snapshot + // region. Hyperlight identity-maps the snapshot region in low + // GPAs, so the same bounds apply to virtual and physical + // addresses there. A crafted config could otherwise direct + // execution into unmapped GPA space (which only catches the + // bug at vCPU run time) or, worse, into the scratch region + // (which is writable). The bound here is + // `[BASE_ADDRESS, BASE_ADDRESS + snapshot_size)` because the + // snapshot blob covers exactly the snapshot region. + let snap_lo = SandboxMemoryLayout::BASE_ADDRESS as u64; + let snap_hi = snap_lo + .checked_add(self.layout.snapshot_size as u64) + .ok_or_else(|| { + crate::new_error!( + "snapshot layout overflow: BASE_ADDRESS + snapshot_size ({}) does not fit in u64", + self.layout.snapshot_size + ) + })?; + let entry_addr = match &self.entrypoint { + EntrypointRepr::Initialise { addr } => *addr, + EntrypointRepr::Call { addr, .. } => *addr, + }; + if entry_addr < snap_lo || entry_addr >= snap_hi { + return Err(crate::new_error!( + "snapshot entrypoint addr {:#x} is outside the snapshot region [{:#x}, {:#x})", + entry_addr, + snap_lo, + snap_hi + )); + } + Ok(()) + } +} + +// --- Save ----------------------------------------------------------- + +/// OCI standard annotation key for a manifest's tag inside an image +/// index. Set on the manifest descriptor in `index.json`, not on the +/// manifest blob itself. See the OCI Image Spec, "Annotations" and +/// the Image Layout spec. +const ANNOTATION_REF_NAME: &str = "org.opencontainers.image.ref.name"; + +/// Validate a tag against the OCI Distribution spec rules: +/// `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`. Required so that the same +/// strings work both in our local layout and when pushed to a +/// registry via `oras` / `crane` / `skopeo`. +fn validate_tag(tag: &str) -> crate::Result<()> { + let bytes = tag.as_bytes(); + if bytes.is_empty() || bytes.len() > 128 { + return Err(crate::new_error!( + "tag {:?} is invalid: must be 1..=128 bytes", + tag + )); + } + let first = bytes[0]; + if !(first.is_ascii_alphanumeric() || first == b'_') { + return Err(crate::new_error!( + "tag {:?} is invalid: first character must be alphanumeric or '_'", + tag + )); + } + for &b in &bytes[1..] { + if !(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-') { + return Err(crate::new_error!( + "tag {:?} is invalid: characters after the first must be \ + alphanumeric or one of '_', '.', '-'", + tag + )); + } + } + Ok(()) +} + +/// Read and parse `path/oci-layout`, asserting the version we +/// support. Returns `Ok(true)` if a valid layout marker was found, +/// `Ok(false)` if the file is absent (so the caller can decide +/// whether to create a fresh layout), or `Err` if the file is +/// present but malformed (treat as user data; refuse to touch). +fn read_oci_layout_marker(path: &Path) -> crate::Result { + let marker_path = path.join("oci-layout"); + let bytes = match std::fs::read(&marker_path) { + Ok(b) => b, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(false), + Err(e) => { + return Err(crate::new_error!( + "failed to read oci-layout marker at {:?}: {}", + marker_path, + e + )); + } + }; + let v: serde_json::Value = serde_json::from_slice(&bytes).map_err(|e| { + crate::new_error!("oci-layout at {:?} is not valid JSON: {}", marker_path, e) + })?; + let version = v + .get("imageLayoutVersion") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + crate::new_error!("oci-layout at {:?} missing imageLayoutVersion", marker_path) + })?; + if version != OCI_LAYOUT_VERSION { + return Err(crate::new_error!( + "unsupported OCI image layout version {:?} at {:?} (expected {:?})", + version, + marker_path, + OCI_LAYOUT_VERSION + )); + } + Ok(true) +} + +/// Write a blob at `blobs/sha256/`, but skip the actual +/// write if a file with that name already exists. OCI blobs are +/// content-addressed, so an existing file with the same digest must +/// have the same bytes. Skipping reuses the blob across snapshots +/// in the same layout. +fn write_blob_dedup(blobs_dir: &Path, hex_digest: &str, bytes: &[u8]) -> crate::Result<()> { + let final_path = blobs_dir.join(hex_digest); + if final_path.exists() { + return Ok(()); + } + std::fs::write(&final_path, bytes) + .map_err(|e| crate::new_error!("failed to write blob {:?}: {}", final_path, e)) +} + +/// Compute sha256 of `bytes` and verify it equals `expected_hex`. +/// Used to validate manifest and config blobs (small, already in +/// memory). +fn verify_blob_bytes(label: &str, bytes: &[u8], expected_hex: &str) -> crate::Result<()> { + let actual = Digest256::from_bytes(bytes); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} + +/// Stream-hash an already-open file and verify its sha256 equals +/// `expected_hex`. +/// +/// Takes the same `File` handle the caller will subsequently `mmap`, +/// not a path. Hashing one open and mapping another is open-then- +/// replace TOCTOU bait. Seeks to start before and after so the +/// caller's file position is unchanged. +fn verify_blob_file( + label: &str, + file: &mut std::fs::File, + expected_hex: &str, +) -> crate::Result<()> { + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to seek {} blob: {}", label, e))?; + let mut hasher = Sha256::new(); + let mut buf = [0u8; 64 * 1024]; + loop { + let n = file + .read(&mut buf) + .map_err(|e| crate::new_error!("failed to read {} blob: {}", label, e))?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to rewind {} blob: {}", label, e))?; + let actual = Digest256::from_hasher(hasher); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} + +/// Read a file in full, refusing if the file is bigger than `max_size`. +/// +/// The cap is enforced on the actual byte stream via [`Read::take`], so files +/// whose `metadata().len()` is misleading cannot exceed the limit. +fn read_bounded(path: &Path, max_size: u64) -> crate::Result> { + let f = std::fs::File::open(path) + .map_err(|e| crate::new_error!("failed to open {:?}: {}", path, e))?; + let hint = f.metadata().map(|m| m.len().min(max_size)).unwrap_or(0); + let mut buf = Vec::with_capacity(hint as usize); + // Read one extra byte so we can distinguish "exactly at the limit" from + // "over the limit" instead of silently truncating an oversize file. + f.take(max_size.saturating_add(1)) + .read_to_end(&mut buf) + .map_err(|e| crate::new_error!("failed to read {:?}: {}", path, e))?; + if buf.len() as u64 > max_size { + return Err(crate::new_error!( + "file {:?} exceeds maximum allowed {} bytes", + path, + max_size + )); + } + Ok(buf) +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index 91fad0d4c..77647cada 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod file; +mod file_tests; + use std::collections::{BTreeMap, HashMap}; use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; From 8a064a1bcd2b66d164e9edd268a146a37596f413 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 3/5] Add tests for OCI snapshot persistence Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/sandbox/snapshot/file_tests.rs | 2562 +++++++++++++++++ 1 file changed, 2562 insertions(+) create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file_tests.rs diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs new file mode 100644 index 000000000..171ff3779 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -0,0 +1,2562 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Tests for the OCI Image Layout snapshot format (`super::file`). + +#![cfg(test)] + +use std::sync::Arc; + +use hyperlight_testing::simple_guest_as_string; +use serde_json::Value; +use sha2::{Digest as _, Sha256}; + +use crate::func::Registerable; +use crate::sandbox::snapshot::Snapshot; +use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + +fn create_test_sandbox() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + UninitializedSandbox::new(GuestBinary::FilePath(path), None) + .unwrap() + .evolve() + .unwrap() +} + +fn create_snapshot_from_binary() -> Snapshot { + let path = simple_guest_as_string().unwrap(); + Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap() +} + +/// `Result::unwrap_err` requires `T: Debug`, but `Snapshot` is not +/// `Debug`. This wrapper is the test-side equivalent. +#[track_caller] +fn unwrap_err_snapshot(r: crate::Result) -> crate::HyperlightError { + match r { + Err(e) => e, + Ok(_) => panic!("expected Snapshot::from_oci to fail"), + } +} + +/// Locate the single config blob inside `oci_dir`. Returns its full +/// path. Used by tests that mutate the on-disk JSON. +fn find_config_blob(oci_dir: &std::path::Path) -> std::path::PathBuf { + let manifest_bytes = std::fs::read(oci_dir.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = oci_dir.join("blobs").join("sha256").join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let cfg_digest = manifest["config"]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + oci_dir.join("blobs").join("sha256").join(cfg_digest) +} + +// ============================================================================= +// In-memory `from_snapshot` round-trips (no file I/O). +// ============================================================================= + +#[test] +fn from_snapshot_already_initialized_in_memory() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None).unwrap(); + let result: i32 = sbox2.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn from_snapshot_in_memory_pre_init() { + let snap = create_snapshot_from_binary(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(snap), HostFunctions::default(), None).unwrap(); + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +// ============================================================================= +// Round-trip via OCI layout on disk. +// ============================================================================= + +#[test] +fn round_trip_save_load_call() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let oci = dir.path().join("snap"); + snapshot.to_oci(&oci, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&oci, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +#[test] +fn snapshot_and_pt_size_round_trip() { + // Running-sandbox snapshot. + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original_snapshot_size = snap.layout().snapshot_size; + let original_pt_size = snap.layout().pt_size; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("running"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, original_snapshot_size); + assert_eq!(loaded.layout().pt_size, original_pt_size); + + // Pre-init snapshot. + let preinit = create_snapshot_from_binary(); + let preinit_snapshot_size = preinit.layout().snapshot_size; + let preinit_pt_size = preinit.layout().pt_size; + + let path = dir.path().join("preinit"); + preinit.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, preinit_snapshot_size); + assert_eq!(loaded.layout().pt_size, preinit_pt_size); +} + +#[test] +fn pre_init_snapshot_save_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("preinit"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +// ============================================================================= +// Restore semantics (id/generation gating). +// ============================================================================= + +#[test] +fn restore_from_loaded_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sbox2 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox2.call::("AddToStatic", 5i32).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 5); + + sbox2.restore(loaded).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_across_independent_oci_loads_succeeds() { + // Compatibility between a sandbox and a snapshot is structural + // (memory layout plus host-function set). Two independent + // `from_oci` loads of the same image produce structurally + // identical snapshots, so a sandbox built from one accepts a + // restore from the other. + let mut sbox = create_test_sandbox(); + let snap1 = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("snap1"); + snap1.to_oci(&p1, "latest").unwrap(); + let p2 = dir.path().join("snap2"); + snap1.to_oci(&p2, "latest").unwrap(); + + let loaded1 = Arc::new(Snapshot::from_oci(&p1, "latest").unwrap()); + let loaded2 = Arc::new(Snapshot::from_oci(&p2, "latest").unwrap()); + + let mut sbox = MultiUseSandbox::from_snapshot(loaded2, HostFunctions::default(), None).unwrap(); + sbox.restore(loaded1).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn many_sandboxes_share_single_arc_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sandboxes = Vec::new(); + for _ in 0..4 { + sandboxes.push( + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(), + ); + } + for sbox in sandboxes.iter_mut() { + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} + +#[test] +fn concurrent_sandboxes_from_same_oci() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let path = std::sync::Arc::new(path); + let mut handles = Vec::new(); + for _ in 0..4 { + let p = path.clone(); + handles.push(std::thread::spawn(move || { + let loaded = Snapshot::from_oci(p.as_ref(), "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + })); + } + for h in handles { + h.join().unwrap(); + } +} + +#[test] +fn cow_does_not_mutate_backing_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Hash every blob file to verify nothing changes after a CoW write + // through the loaded sandbox. + let blobs_dir = path.join("blobs").join("sha256"); + let snapshot_before: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + + { + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + sbox.call::("AddToStatic", 99).unwrap(); + } + + let snapshot_after: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + assert_eq!( + snapshot_before, snapshot_after, + "CoW writes must not mutate any blob in the OCI layout" + ); +} + +// ============================================================================= +// Architecture / hypervisor / ABI gating. +// ============================================================================= + +/// Compute sha256 of `bytes` and return the lowercase hex digest. +fn sha256_hex(bytes: &[u8]) -> String { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + hex::encode(arr) +} + +fn rewrite_config(oci_dir: &std::path::Path, mutate: F) { + // Mutate the config blob and rewrite the manifest + index so the + // OCI layout stays self-consistent: blob filenames, descriptor + // sizes, and descriptor sha256 digests all match the current + // bytes on disk. The point of these helpers is to exercise + // field-level validators (arch, abi_version, hypervisor, etc.), + // not the digest layer; tests that want to probe the digest + // layer write raw bytes directly. + let cfg_path = find_config_blob(oci_dir); + let mut cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + mutate(&mut cfg); + let new_cfg_bytes = serde_json::to_vec_pretty(&cfg).unwrap(); + let new_cfg_hex = sha256_hex(&new_cfg_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_cfg_path = blobs_dir.join(&new_cfg_hex); + std::fs::write(&new_cfg_path, &new_cfg_bytes).unwrap(); + if new_cfg_path != cfg_path { + std::fs::remove_file(&cfg_path).ok(); + } + + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + manifest["config"]["digest"] = Value::from(format!("sha256:{}", new_cfg_hex)); + manifest["config"]["size"] = Value::from(new_cfg_bytes.len() as u64); + let new_manifest_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_manifest_hex = sha256_hex(&new_manifest_bytes); + let new_manifest_path = blobs_dir.join(&new_manifest_hex); + std::fs::write(&new_manifest_path, &new_manifest_bytes).unwrap(); + if new_manifest_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_manifest_hex)); + index["manifests"][0]["size"] = Value::from(new_manifest_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Locate the manifest blob path inside `oci_dir`. +fn manifest_path(oci_dir: &std::path::Path) -> std::path::PathBuf { + let index: Value = + serde_json::from_slice(&std::fs::read(oci_dir.join("index.json")).unwrap()).unwrap(); + let digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + oci_dir.join("blobs").join("sha256").join(digest) +} + +/// Mutate the on-disk manifest JSON. Updates the index's manifest +/// descriptor `size` and `digest` to match the new manifest bytes +/// so the test exercises the field-level validator we care about, +/// not the digest layer. +fn rewrite_manifest(oci_dir: &std::path::Path, mutate: F) { + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + mutate(&mut manifest); + let new_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_hex = sha256_hex(&new_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_path = blobs_dir.join(&new_hex); + std::fs::write(&new_path, &new_bytes).unwrap(); + if new_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_hex)); + index["manifests"][0]["size"] = Value::from(new_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Mutate the on-disk index JSON in place. The index is the root of +/// the OCI layout and is not itself referenced by any digest, so +/// nothing further needs to be updated. +fn rewrite_index(oci_dir: &std::path::Path, mutate: F) { + let path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&path).unwrap()).unwrap(); + mutate(&mut index); + std::fs::write(path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +#[test] +fn arch_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch, got: {}", + msg + ); +} + +#[test] +fn abi_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["abi_version"] = Value::from(9999u32); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("ABI") || msg.contains("abi"), + "expected ABI version mismatch, got: {}", + msg + ); +} + +#[test] +fn hypervisor_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Pick a hypervisor that is not the current one. + let current = cfg_current_hypervisor(); + let other = if current == "kvm" { "mshv" } else { "kvm" }; + + rewrite_config(&path, |cfg| { + cfg["hypervisor"] = Value::from(other); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("hypervisor"), + "expected hypervisor mismatch, got: {}", + msg + ); +} + +fn cfg_current_hypervisor() -> &'static str { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("probe"); + create_snapshot_from_binary() + .to_oci(&path, "latest") + .unwrap(); + let cfg_path = find_config_blob(&path); + let cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + match cfg["hypervisor"].as_str().unwrap() { + "kvm" => "kvm", + "mshv" => "mshv", + "whp" => "whp", + other => panic!("unknown hypervisor tag {other}"), + } +} + +// ============================================================================= +// Entrypoint vs sregs invariants enforced by serde shape. +// ============================================================================= + +#[test] +fn call_snapshot_without_sregs_rejected() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Strip sregs from the entrypoint variant. serde must reject the + // missing field at parse time. + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "call"); + entry.remove("sregs"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("missing field") || msg.contains("config"), + "expected serde error about missing sregs, got: {}", + msg + ); +} + +#[test] +fn initialise_snapshot_with_sregs_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Add a bogus sregs field to the Initialise variant. serde must + // reject the unknown field (variant has deny_unknown_fields). + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "initialise"); + entry.insert("sregs".to_string(), Value::from("{}")); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("unknown field") || msg.contains("config"), + "expected serde error about unknown field sregs, got: {}", + msg + ); +} + +// ============================================================================= +// Host functions validation. +// +// `validate_host_functions` enforces a superset relation: every host +// function registered when the snapshot was taken must be present in +// the loaded sandbox's `HostFunctions` with a matching signature. +// Extras in the loaded set are allowed. +// ============================================================================= + +/// Build a `MultiUseSandbox` with the default host functions plus a +/// custom `Add(i32, i32) -> i32`. Used to seed the snapshot side of +/// the host-function validation tests so the snapshot has a +/// non-default required function. +fn create_sandbox_with_custom_host_funcs() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.evolve().unwrap() +} + +/// `HostFunctions::default()` plus a matching `Add(i32, i32) -> i32`. +fn host_funcs_with_matching_add() -> HostFunctions { + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + hf +} + +#[test] +fn from_snapshot_accepts_matching_host_functions() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_rejects_missing_host_function() { + // Snapshot was taken with `Add` registered. Loading with the + // default `HostFunctions` (no `Add`) must be rejected. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .expect_err("from_snapshot must reject a HostFunctions set missing `Add`"); + let msg = format!("{}", err); + assert!( + msg.contains("missing") && msg.contains("Add"), + "expected missing-host-function error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_rejects_signature_mismatch() { + // Snapshot has `Add(i32, i32) -> i32`. Load registers an `Add` + // with a different signature. validate_host_functions must + // refuse the mismatch. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: String, b: String| Ok(format!("{a}{b}"))) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect_err("from_snapshot must reject a signature mismatch on Add"); + let msg = format!("{}", err); + assert!( + msg.contains("signature mismatches") && msg.contains("Add"), + "expected signature-mismatch error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_accepts_extra_host_functions() { + // Snapshot has `Add`. Load registers `Add` (matching) plus an + // unrelated `Mul`. Extras are allowed. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = host_funcs_with_matching_add(); + hf.register_host_function("Mul", |a: i32, b: i32| Ok(a * b)) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_accepts_zero_arg_host_function() { + // A zero-arg host function must round-trip through OCI. + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Zero", || Ok(7i64)).unwrap(); + let mut sbox = u.evolve().unwrap(); + + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Zero", || Ok(7i64)).unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect("zero-arg host function must round-trip through OCI"); +} + +#[test] +fn from_snapshot_has_default_host_print() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let _ = sbox2.call::("PrintTwoArgs", ("hi".to_string(), 42i32)); +} + +// ============================================================================= +// OCI-shape invariants. +// ============================================================================= + +#[test] +fn missing_oci_layout_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("oci-layout")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("oci-layout"), + "expected missing oci-layout error, got: {}", + msg + ); +} + +#[test] +fn wrong_image_layout_version_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::write( + path.join("oci-layout"), + r#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("image layout version") || msg.contains("imageLayoutVersion"), + "expected layout version error, got: {}", + msg + ); +} + +#[test] +fn missing_index_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("index.json")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("index.json"), + "expected missing index.json error, got: {}", + msg + ); +} + +#[test] +fn snapshot_blob_size_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Truncate the snapshot blob by one byte. + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let snap_path = blobs_dir.join(snap_digest); + let bytes = std::fs::read(&snap_path).unwrap(); + std::fs::write(&snap_path, &bytes[..bytes.len() - 1]).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("size") || msg.contains("mismatch"), + "expected size mismatch error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_zero_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + cfg["layout"]["snapshot_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot_size"), + "expected snapshot_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + let s = cfg["layout"]["snapshot_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(s + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected page alignment error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_must_match_memory_size() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + let page = hyperlight_common::vmem::PAGE_SIZE as u64; + rewrite_config(&path, |cfg| { + let m = cfg["memory_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(m + page); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("does not equal memory_size"), + "expected snapshot_size + pt_size != memory_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_pt_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + if let Some(p) = cfg["layout"]["pt_size"].as_u64() { + cfg["layout"]["pt_size"] = Value::from(p + 1); + } else { + cfg["layout"]["pt_size"] = Value::from(1u64); + } + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("pt_size") || msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected pt_size validation error, got: {}", + msg + ); +} + +#[test] +fn missing_snapshot_blob_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + std::fs::remove_file(blobs_dir.join(snap_digest)).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot blob") || msg.contains("No such") || msg.contains("not found"), + "expected missing-blob error, got: {}", + msg + ); +} + +// ============================================================================= +// Path semantics. +// ============================================================================= + +#[test] +fn from_oci_nonexistent_path_returns_error() { + let err = unwrap_err_snapshot(Snapshot::from_oci("/nonexistent/path/to/oci", "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("stat") || msg.contains("No such") || msg.contains("not found"), + "expected missing-path error, got: {}", + msg + ); +} + +#[test] +fn from_oci_file_not_directory_rejected() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hello").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&file_path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("not a directory"), + "expected not-a-directory error, got: {}", + msg + ); +} + +#[test] +fn to_oci_refuses_existing_non_oci_directory() { + // The contract is: `to_oci` errors out if a non-OCI-layout + // directory exists at `path`, without modifying it. The caller + // is responsible for cleaning up the previous directory (or + // choosing a different path). This eliminates the foot-gun of + // `remove_dir_all` wiping arbitrary user data on a typo. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + std::fs::create_dir(&path).unwrap(); + std::fs::write(path.join("stale.txt"), b"do-not-delete").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{}", err); + assert!( + msg.contains("not an OCI image layout"), + "expected non-OCI-layout refusal error, got: {}", + msg + ); + + // Unrelated content must survive untouched. + assert_eq!( + std::fs::read(path.join("stale.txt")).unwrap(), + b"do-not-delete", + "to_oci must not delete unrelated files in the target directory" + ); +} + +#[test] +fn to_oci_refuses_existing_file() { + // Same contract for a regular file at `path`: refuse without + // touching it. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + std::fs::write(&path, b"i am a file, not a snapshot").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{}", err); + assert!( + msg.contains("not a directory"), + "expected refusal-on-non-directory error, got: {}", + msg + ); + assert_eq!( + std::fs::read(&path).unwrap(), + b"i am a file, not a snapshot", + "to_oci must not touch a pre-existing file at the target path" + ); +} + +#[test] +fn to_oci_refuses_duplicate_tag() { + // Saving the same tag twice into one layout is rejected: the + // caller has to delete the existing tag first, or pick a + // different name. This avoids accidental in-place replacement. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{}", err); + assert!( + msg.contains("refusing to overwrite tag") && msg.contains("\"latest\""), + "expected duplicate-tag refusal error, got: {}", + msg + ); + // The first tag must still be loadable. + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +/// Asserts the integrity contract: a snapshot blob whose bytes have +/// been replaced (without changing length, so descriptor sizes still +/// match) must be rejected on load via digest mismatch. +#[test] +fn from_oci_rejects_snapshot_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Locate the snapshot blob via the manifest, then flip one byte + // somewhere in the middle. Length is preserved so all descriptor + // size checks still pass. Only a digest re-hash can detect this. + let blobs_dir = path.join("blobs").join("sha256"); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(blobs_dir.join(&manifest_digest)).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let snap_path = blobs_dir.join(&snap_digest); + let mut bytes = std::fs::read(&snap_path).unwrap(); + let mid = bytes.len() / 2; + bytes[mid] ^= 0xFF; + std::fs::write(&snap_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +/// Same idea as `from_oci_rejects_snapshot_blob_byte_mutation`, but +/// targeting the config blob. A config-blob mutation that preserves +/// the descriptor size and the structural fields the loader +/// validates today (e.g. flipping a byte inside the host-function +/// flatbuffer payload) must be caught by digest verification. +#[test] +fn from_oci_rejects_config_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let cfg_path = find_config_blob(&path); + let mut bytes = std::fs::read(&cfg_path).unwrap(); + // Replace the first ASCII brace `{` with a different byte that + // keeps the file the same length but yields a different sha256. + // This will also break JSON parsing, but the point is to assert + // the digest layer rejects it before the parser ever runs. + bytes[0] = b' '; + std::fs::write(&cfg_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +#[test] +fn from_oci_observes_per_path_contents() { + // `to_oci` no longer permits overwriting, so verifying that two + // independent saves have independent contents is done by writing + // each snapshot to its own path and asserting the loaded + // contents differ. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let snap_x = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path_x = dir.path().join("snap_x"); + snap_x.to_oci(&path_x, "latest").unwrap(); + + let loaded_x = Snapshot::from_oci(&path_x, "latest").unwrap(); + let mut sbox_x = + MultiUseSandbox::from_snapshot(Arc::new(loaded_x), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 11); + + sbox.call::("AddToStatic", 44i32).unwrap(); + let snap_y = sbox.snapshot().unwrap(); + let path_y = dir.path().join("snap_y"); + snap_y.to_oci(&path_y, "latest").unwrap(); + + let loaded_y = Snapshot::from_oci(&path_y, "latest").unwrap(); + let mut sbox_y = + MultiUseSandbox::from_snapshot(Arc::new(loaded_y), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_y.call::("GetStatic", ()).unwrap(), 55); +} + +// ============================================================================= +// Exhaustive input-validation tests for `from_oci`. +// +// Every load-side error path in `super::file::from_oci` should be +// exercised here. +// ============================================================================= + +fn save_for_mutation() -> (tempfile::TempDir, std::path::PathBuf) { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + (dir, path) +} + +fn assert_err_contains(err: crate::HyperlightError, needle: &str) { + let msg = format!("{}", err); + assert!( + msg.contains(needle), + "expected error to contain {:?}, got: {}", + needle, + msg + ); +} + +#[test] +fn malformed_oci_layout_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), b"not-valid-json{").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +#[test] +fn oci_layout_missing_version_field_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), r#"{"unrelated":"field"}"#).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "imageLayoutVersion"); +} + +#[test] +fn malformed_index_json_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("index.json"), b"{not json").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "index.json"); +} + +#[test] +fn empty_index_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + idx["manifests"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_rejects_duplicate_tag_in_index() { + // A valid OCI layout has unique tags. Two manifests sharing the + // same `org.opencontainers.image.ref.name` annotation is + // malformed and from_oci must refuse rather than silently + // pick one. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let first = idx["manifests"][0].clone(); + idx["manifests"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "multiple manifests tagged"); +} + +#[test] +fn missing_manifest_blob_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::remove_file(manifest_path(&path)).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-manifest error, got: {}", + msg + ); +} + +#[test] +fn bad_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + // Strip the algorithm prefix entirely. `oci-spec` validates + // descriptor digests on parse, so the index parser rejects + // this before our own digest helper sees it. + idx["manifests"][0]["digest"] = Value::from("deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("index.json"), + "expected digest or parse error, got: {}", + msg + ); +} + +#[test] +fn malformed_manifest_json_rejected() { + // Probes the manifest JSON parser. Under `from_oci`, the + // digest-verification step would fire first and short-circuit + // this; that path is covered by + // `from_oci_rejects_manifest_blob_byte_mutation`. Use + // `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + std::fs::write(&mp, b"{not json").unwrap(); + // Update index size to match so we hit the JSON parser, not the + // size check. + let new_len = std::fs::metadata(&mp).unwrap().len(); + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(new_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "manifest"); +} + +#[test] +fn wrong_manifest_schema_version_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["schemaVersion"] = Value::from(99u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "schemaVersion"); +} + +#[test] +fn unknown_config_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["mediaType"] = Value::from("application/vnd.example.unknown.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config media type"); +} + +#[test] +fn empty_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn extra_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let first = m["layers"][0].clone(); + m["layers"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn unknown_snapshot_layer_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["mediaType"] = Value::from("application/vnd.example.unknown.v1"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "snapshot layer media type"); +} + +/// Manifest- and index-level annotations injected by third-party +/// tools (cosign, ORAS, build pipelines, etc.) must NOT break load. +/// `HlConfig` is intentionally strict (`deny_unknown_fields`) but +/// the OCI envelope around it is parsed via `oci-spec`'s lenient +/// types. +#[test] +fn manifest_and_index_annotations_tolerated() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_manifest(&path, |m| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.created".to_string(), + Value::from("2024-01-01T00:00:00Z"), + ); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDsignature"), + ); + m["annotations"] = Value::Object(anns); + }); + rewrite_index(&path, |idx| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.ref.name".to_string(), + Value::from("v1.2.3"), + ); + idx["annotations"] = Value::Object(anns); + }); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn config_blob_size_descriptor_mismatch_rejected() { + let (_dir, path) = save_for_mutation(); + // Bump the config descriptor's claimed size by one without + // touching the actual blob. + rewrite_manifest(&path, |m| { + let sz = m["config"]["size"].as_u64().unwrap(); + m["config"]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config blob size mismatch"); +} + +#[test] +fn malformed_config_json_rejected() { + // Probes the config JSON parser. Under `from_oci` the + // digest-verification step would fire first; that path is + // covered by `from_oci_rejects_config_blob_byte_mutation`. + // Use `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::write(&cfg_path, b"{not json").unwrap(); + // Update both the manifest's config descriptor size and the + // index's manifest descriptor size to match so we reach the + // JSON parser, not the size check. + let new_cfg_len = std::fs::metadata(&cfg_path).unwrap().len(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(new_cfg_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "config JSON"); +} + +#[test] +fn memory_size_zero_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["memory_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn memory_size_unaligned_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let sz = cfg["memory_size"].as_u64().unwrap(); + cfg["memory_size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + // Either the page-alignment check or the file-size check trips. + // Both are valid signals that the value was rejected. + assert!( + msg.contains("memory_size") || msg.contains("PAGE_SIZE") || msg.contains("size"), + "expected memory_size rejection, got: {}", + msg + ); +} + +#[test] +fn bad_init_data_permissions_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 1u32 << 31 is well outside the defined READ|WRITE|EXECUTE bits. + cfg["layout"]["init_data_permissions"] = Value::from(0x8000_0000u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "init_data_permissions"); +} + +#[test] +fn entrypoint_addr_outside_snapshot_region_rejected() { + // A crafted config can claim any u64 as the entry point. The + // loader must refuse addresses that don't lie within + // [BASE_ADDRESS, BASE_ADDRESS + snapshot_size) so a malicious + // image can't direct execution into unmapped GPA space or into + // the writable scratch region. + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0xDEAD_BEEF_0000 is far above any plausible snapshot + // region (snapshot_size is bounded by MAX_MEMORY_SIZE, + // ~16 GiB) and outside guest mapped memory. + entry["addr"] = Value::from(0xDEAD_BEEF_0000u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +#[test] +fn entrypoint_addr_below_base_address_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0 is below BASE_ADDRESS (0x1000); rejected as "outside the + // snapshot region". + entry["addr"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +// ============================================================================= +// `from_oci_unchecked`: skips blob digest verification but still runs +// every other validator (OCI structure, descriptor sizes, schema +// versions, arch / hypervisor / ABI tags, layout bounds, entrypoint +// bounds). +// ============================================================================= + +#[test] +fn from_oci_unchecked_round_trips() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci_unchecked(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let result: String = sbox2.call("Echo", "hi\n".to_string()).unwrap(); + assert_eq!(result, "hi\n"); +} + +#[test] +fn from_oci_unchecked_still_validates_config_fields() { + // Field-level validators (arch, abi, hypervisor, layout bounds, + // entrypoint bounds) must still fire under `from_oci_unchecked`. + // Use `rewrite_config` so the layout stays self-consistent + // (otherwise the checked path would also catch this via the + // descriptor-size check before the field validator runs). + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch under from_oci_unchecked, got: {}", + msg + ); +} + +#[test] +fn from_oci_rejects_manifest_blob_byte_mutation() { + // Mutate a manifest body byte (without updating the index's + // descriptor digest) and confirm the loader catches it via + // digest verification before any of the field-level manifest + // validators (schema version, media type, etc.) run. + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let mp = manifest_path(&path); + let mut bytes = std::fs::read(&mp).unwrap(); + // Flip the first byte. Length is preserved so the descriptor + // size check still passes; only digest verification can detect + // this. The byte will also break JSON parsing, but the digest + // check fires first. + bytes[0] ^= 0x20; + std::fs::write(&mp, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "digest mismatch"); +} + +// ============================================================================= +// Multi-tag layouts. +// +// One OCI Image Layout directory can hold any number of snapshots, +// each addressed by tag. Blobs (manifest, config, snapshot memory) +// are deduplicated across tags by content digest. +// ============================================================================= + +#[test] +fn append_distinct_tags_to_one_layout() { + // Two distinguishable snapshots saved into the same layout + // under different tags. Each is loadable by its own tag and has + // the right contents. + let mut sbox = create_test_sandbox(); + + sbox.call::("AddToStatic", 7i32).unwrap(); + let snap_a = sbox.snapshot().unwrap(); + + sbox.call::("AddToStatic", 30i32).unwrap(); + let snap_b = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap_a.to_oci(&path, "snap-a").unwrap(); + snap_b.to_oci(&path, "snap-b").unwrap(); + + let loaded_a = Snapshot::from_oci(&path, "snap-a").unwrap(); + let loaded_b = Snapshot::from_oci(&path, "snap-b").unwrap(); + + let mut sbox_a = + MultiUseSandbox::from_snapshot(Arc::new(loaded_a), HostFunctions::default(), None).unwrap(); + let mut sbox_b = + MultiUseSandbox::from_snapshot(Arc::new(loaded_b), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_a.call::("GetStatic", ()).unwrap(), 7); + assert_eq!(sbox_b.call::("GetStatic", ()).unwrap(), 37); +} + +#[test] +fn appending_dedupes_identical_blobs() { + // Two saves of the SAME snapshot under different tags must not + // duplicate any blob in `blobs/sha256/`. Every blob in the + // layout must be the union of the per-tag manifest, config, and + // snapshot blob digests, but with each unique digest counted + // once. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + snap.to_oci(&path, "v2").unwrap(); + + let blobs_dir = path.join("blobs").join("sha256"); + let blob_count = std::fs::read_dir(&blobs_dir).unwrap().count(); + // Identical snapshots share the snapshot blob, the config blob, + // and the manifest blob; only the index distinguishes them. + // Therefore exactly 3 blobs. + assert_eq!( + blob_count, 3, + "expected 3 blobs (snapshot + config + manifest, all dedup'd) in {:?}", + blobs_dir + ); + + // Both tags load and produce sandboxes with the same state. + let _ = Snapshot::from_oci(&path, "v1").unwrap(); + let _ = Snapshot::from_oci(&path, "v2").unwrap(); +} + +#[test] +fn appending_distinct_snapshots_dedups_shared_blobs() { + // Two DIFFERENT snapshots saved under different tags. Each has + // its own snapshot memory blob and its own manifest blob, but + // the config blob (which captures arch / hypervisor / layout / + // host functions / entrypoint, but not guest memory) is + // typically identical across snapshots from the same sandbox, + // so we expect 5 blobs: 2 memory + 1 shared config + 2 + // manifests. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 1i32).unwrap(); + let snap_a = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 2i32).unwrap(); + let snap_b = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap_a.to_oci(&path, "a").unwrap(); + snap_b.to_oci(&path, "b").unwrap(); + + let blobs_dir = path.join("blobs").join("sha256"); + let blob_count = std::fs::read_dir(&blobs_dir).unwrap().count(); + assert_eq!( + blob_count, 5, + "expected 5 distinct blobs (2 memory + 1 shared config + 2 manifests) in {:?}", + blobs_dir + ); +} + +#[test] +fn from_oci_unknown_tag_lists_available_tags() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "alpha").unwrap(); + snap.to_oci(&path, "beta").unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "missing")); + let msg = format!("{}", err); + assert!( + msg.contains("no manifest tagged") && msg.contains("\"missing\""), + "expected unknown-tag error mentioning the requested tag, got: {}", + msg + ); + assert!( + msg.contains("alpha") && msg.contains("beta"), + "expected available-tags listing, got: {}", + msg + ); +} + +#[test] +fn manifest_descriptor_carries_ref_name_annotation() { + // The OCI standard tag annotation must be set on the manifest + // descriptor in `index.json` so external tools (`oras`, + // `crane manifest`, `skopeo inspect`) see the tag. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "production-v3").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest = &index["manifests"][0]; + assert_eq!( + manifest["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap(), + "production-v3" + ); +} + +// ============================================================================= +// Tag validation. +// ============================================================================= + +#[test] +fn empty_tag_rejected_on_save() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap.to_oci(dir.path().join("snap"), "").unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn empty_tag_rejected_on_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "")); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_leading_char_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), ".dotleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "-dashleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_chars_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), "with/slash") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "with space") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn long_tag_within_limit_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(128); + snap.to_oci(dir.path().join("snap"), &tag).unwrap(); + let _ = Snapshot::from_oci(dir.path().join("snap"), &tag).unwrap(); +} + +#[test] +fn over_long_tag_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(129); + let err = snap.to_oci(dir.path().join("snap"), &tag).unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +// ============================================================================= +// Append-side error paths for `to_oci`. +// ============================================================================= + +#[test] +fn to_oci_refuses_layout_with_malformed_oci_layout_marker() { + // Directory exists with a corrupt oci-layout marker. We must + // refuse to touch it, even though it superficially looks like + // an OCI layout. Same rule applies for missing version field. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + std::fs::create_dir(&path).unwrap(); + std::fs::write(path.join("oci-layout"), b"{garbage").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + assert!( + format!("{err}").contains("oci-layout"), + "expected oci-layout marker error, got: {err}" + ); + + // The corrupt marker must not have been replaced. + assert_eq!(std::fs::read(path.join("oci-layout")).unwrap(), b"{garbage"); +} + +#[test] +fn to_oci_refuses_layout_with_oci_layout_missing_version() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + std::fs::create_dir(&path).unwrap(); + std::fs::write(path.join("oci-layout"), b"{}").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + assert!( + format!("{err}").contains("imageLayoutVersion"), + "expected missing-version error, got: {err}" + ); +} + +#[test] +fn to_oci_refuses_layout_with_unsupported_version() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + std::fs::create_dir(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + assert!( + format!("{err}").contains("image layout version"), + "expected unsupported-version error, got: {err}" + ); +} + +#[test] +fn to_oci_refuses_layout_with_missing_index_on_append() { + // Layout with a valid `oci-layout` marker but no `index.json` + // is malformed. `to_oci` must refuse to append and not write a + // new (single-tag) index that would mask the corruption. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + std::fs::create_dir(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"1.0.0"}"#, + ) + .unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + assert!( + format!("{err}").contains("index.json"), + "expected index-related error, got: {err}" + ); +} + +#[test] +fn to_oci_refuses_layout_with_malformed_index_on_append() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + std::fs::create_dir(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"1.0.0"}"#, + ) + .unwrap(); + std::fs::write(path.join("index.json"), b"{not json").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + assert!( + format!("{err}").contains("index.json"), + "expected index-parse error, got: {err}" + ); + // The malformed index must not have been overwritten. + assert_eq!( + std::fs::read(path.join("index.json")).unwrap(), + b"{not json" + ); +} + +#[test] +fn to_oci_does_not_rewrite_oci_layout_on_append() { + // The oci-layout marker is created once, on the fresh-write + // path. On append, the writer must leave it alone (no spurious + // mtime bump, no risk of clobbering a marker the user is + // happy with). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "first").unwrap(); + + let marker_path = path.join("oci-layout"); + let mtime_before = std::fs::metadata(&marker_path).unwrap().modified().unwrap(); + // Bound the test so the second save is reliably distinguishable + // from the first if it did rewrite. Filesystems generally have + // millisecond-or-better mtime resolution. + std::thread::sleep(std::time::Duration::from_millis(50)); + + snap.to_oci(&path, "second").unwrap(); + let mtime_after = std::fs::metadata(&marker_path).unwrap().modified().unwrap(); + assert_eq!( + mtime_before, mtime_after, + "oci-layout marker must not be rewritten on append" + ); +} + +// ============================================================================= +// Save-shape invariants. Verify the on-disk JSON we hand to standard +// OCI tools matches what the spec prescribes. +// ============================================================================= + +#[test] +fn manifest_descriptor_uses_image_manifest_media_type() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + assert_eq!( + index["manifests"][0]["mediaType"].as_str().unwrap(), + "application/vnd.oci.image.manifest.v1+json" + ); +} + +#[test] +fn manifest_uses_correct_config_and_layer_media_types() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(manifest_path(&path)).unwrap()).unwrap(); + assert_eq!( + manifest["config"]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.sandbox.config.v1+json" + ); + assert_eq!(manifest["layers"].as_array().unwrap().len(), 1); + assert_eq!( + manifest["layers"][0]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.v1" + ); +} + +#[test] +fn save_writes_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let marker: Value = + serde_json::from_slice(&std::fs::read(path.join("oci-layout")).unwrap()).unwrap(); + assert_eq!(marker["imageLayoutVersion"].as_str().unwrap(), "1.0.0"); +} + +// ============================================================================= +// Tag selection edge cases. +// ============================================================================= + +#[test] +fn tag_lookup_is_case_sensitive() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "MyTag").unwrap(); + + // Different case must NOT match. + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "mytag")); + assert_err_contains(err, "no manifest tagged"); + + // Exact case loads. + let _ = Snapshot::from_oci(&path, "MyTag").unwrap(); +} + +#[test] +fn ref_name_annotation_key_is_case_sensitive() { + // If the index uses a misspelled annotation key (e.g. + // `org.OpenContainers.image.ref.name`), the manifest is treated + // as untagged and from_oci must not load it under any name. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + let value = anns.remove("org.opencontainers.image.ref.name").unwrap(); + anns.insert("org.OpenContainers.image.ref.name".to_string(), value); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn tag_with_all_valid_special_chars_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + let tag = "v1.2.3-rc.1_build"; + snap.to_oci(&path, tag).unwrap(); + let _ = Snapshot::from_oci(&path, tag).unwrap(); +} + +#[test] +fn three_tags_in_one_layout_each_loads() { + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 1i32).unwrap(); + let s_a = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 2i32).unwrap(); + let s_b = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + let s_c = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + s_a.to_oci(&path, "a").unwrap(); + s_b.to_oci(&path, "b").unwrap(); + s_c.to_oci(&path, "c").unwrap(); + + for (tag, expected) in [("a", 1), ("b", 3), ("c", 7)] { + let loaded = Snapshot::from_oci(&path, tag).unwrap(); + let mut s = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + assert_eq!( + s.call::("GetStatic", ()).unwrap(), + expected, + "tag {tag}" + ); + } +} + +#[test] +fn other_descriptor_annotations_do_not_interfere() { + // A manifest descriptor with the standard ref.name annotation + // PLUS unrelated annotations (cosign signatures, build + // pipelines, etc.) must still resolve by tag. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDfake"), + ); + anns.insert("io.example.build.id".to_string(), Value::from("12345")); + }); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +// ============================================================================= +// Bad sha256 digest format on the inner descriptors (config and snapshot +// layer). The index-side equivalent is `bad_digest_format_rejected`. +// ============================================================================= + +#[test] +fn bad_config_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["digest"] = Value::from("md5:deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +#[test] +fn bad_snapshot_layer_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["digest"] = Value::from("sha256:tooshort"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +// ============================================================================= +// Missing inner blobs. +// ============================================================================= + +#[test] +fn missing_config_blob_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::remove_file(&cfg_path).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-config-blob error, got: {msg}" + ); +} + +// ============================================================================= +// Size-bound enforcement. +// ============================================================================= + +#[test] +fn manifest_blob_too_large_rejected() { + // The manifest reader bounds to 1 MiB. Replace the manifest + // with junk longer than that and confirm the bound trips + // before any parsing. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&mp, &huge).unwrap(); + // Update descriptor size to match so we hit the bound check, + // not the size mismatch check. + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn config_blob_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&cfg_path, &huge).unwrap(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn memory_size_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 16 GiB exceeds MAX_MEMORY_SIZE. + cfg["memory_size"] = Value::from(16u64 * 1024 * 1024 * 1024); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn snapshot_descriptor_size_disagrees_with_file_rejected() { + // Snapshot descriptor claims a different size than the actual + // blob file. The loader must reject before mmap-ing. + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let sz = m["layers"][0]["size"].as_u64().unwrap(); + m["layers"][0]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("snapshot blob size"), + "expected snapshot-blob descriptor disagreement error, got: {msg}" + ); +} + +// ============================================================================= +// `from_oci_unchecked` shares the same non-digest validators with +// `from_oci`. The key safety claim of the unchecked path is that it +// is faster, NOT that it is more permissive about anything other +// than digest checks. Pin that contract down here. +// ============================================================================= + +#[test] +fn from_oci_unchecked_validates_tag_format() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "bad/tag")); + assert_err_contains(err, "tag"); +} + +#[test] +fn from_oci_unchecked_rejects_unknown_tag() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "nosuch")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_unchecked_rejects_path_not_directory() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hi").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&file_path, "latest")); + assert_err_contains(err, "not a directory"); +} + +#[test] +fn from_oci_unchecked_rejects_missing_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + std::fs::remove_file(path.join("oci-layout")).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +// ============================================================================= +// Round-trip data fidelity. +// +// The serde shape tests already prove individual fields parse, but +// they don't prove that all the values that came out of the producer +// reach the loaded snapshot. These tests pin down full round-trip +// fidelity for fields that are not exercised by the +// "load-then-call-the-guest" round-trip tests above. +// ============================================================================= + +#[test] +fn round_trip_preserves_stack_top_gva() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original = snap.stack_top_gva(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.stack_top_gva(), original); +} + +#[test] +fn round_trip_preserves_non_default_scratch_size() { + use crate::sandbox::SandboxConfiguration; + let mut cfg = SandboxConfiguration::default(); + let custom_scratch: usize = 256 * 1024; + cfg.set_scratch_size(custom_scratch); + let snap = Snapshot::from_env( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + cfg, + ) + .unwrap(); + let original = snap.layout().get_scratch_size(); + assert_eq!(original, custom_scratch); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().get_scratch_size(), custom_scratch); +} + +#[test] +fn pre_init_snapshot_writes_initialise_entrypoint_kind() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "initialise"); + assert!( + cfg["entrypoint"].get("sregs").is_none(), + "Initialise snapshot must not carry sregs in the config" + ); +} + +#[test] +fn already_initialised_snapshot_writes_call_entrypoint_kind() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "call"); + assert!( + cfg["entrypoint"]["sregs"].is_object(), + "Call snapshot must carry sregs in the config" + ); +} + +#[test] +fn round_trip_preserves_host_function_signatures() { + // Save a snapshot with a custom host function signature, load + // it, and confirm the recorded signatures survive. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + let funcs = cfg["host_functions"].as_array().unwrap(); + let add = funcs + .iter() + .find(|f| f["function_name"].as_str().unwrap() == "Add") + .expect("Add must be recorded"); + assert_eq!( + add["parameter_types"].as_array().unwrap().len(), + 2, + "Add signature must record two parameters" + ); + // Loading and using the snapshot must accept the same signature. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); +} + +#[test] +fn snapshot_with_no_host_functions_round_trips() { + // A snapshot with `host_functions: []` must round-trip without + // confusing the loader (which has special handling for the + // empty-vs-None case). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert!( + cfg["host_functions"].as_array().unwrap().is_empty(), + "expected empty host_functions array for pre-init snapshot" + ); + + // The default HostFunctions set is sufficient because the + // snapshot requires nothing. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); +} + +// ============================================================================= +// Snapshot lineage and restore semantics. +// +// Hyperlight's snapshot model is NOT a tree. Each `MultiUseSandbox` +// has a process-local `sandbox_id`; `snapshot()` tags the snapshot +// with that id; `from_snapshot(snap)` adopts `snap.sandbox_id()` so +// the new sandbox can restore back to it; and `restore(snap)` +// requires `self.id == snap.sandbox_id()`. So sandboxes built from +// clones of the same `Arc` form a flat id-equivalence +// class within which restore is freely interchangeable. +// +// These tests pin down all the combinations of build-from-snapshot, +// take-more-snapshots, restore-out-of-order, and reject-across-class +// that follow from that model. +// ============================================================================= + +#[test] +fn linear_chain_restore_in_order() { + // Take three snapshots at different states in one sandbox, then + // restore to each in chronological order. After each restore, + // the static counter must read the value it had when that + // snapshot was taken. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let s10 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 20i32).unwrap(); + let s30 = sbox.snapshot().unwrap(); + + sbox.restore(s0.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s10.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 10); + sbox.restore(s30.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 30); +} + +#[test] +fn linear_chain_restore_out_of_order() { + // Restore through the same chain but in a non-monotonic order + // (forward, back, forward, back). Snapshots within one + // id-equivalence class are NOT ordered by when they were + // taken: any can be restored to from any other. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 7i32).unwrap(); + let s7 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + let s107 = sbox.snapshot().unwrap(); + + let order = [&s107, &s0, &s7, &s107, &s0]; + let expected = [107, 0, 7, 107, 0]; + for (snap, want) in order.iter().zip(expected.iter()) { + sbox.restore((*snap).clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), *want); + } +} + +#[test] +fn restore_then_call_then_snapshot_then_restore() { + // Restore changes the live state, but it must NOT invalidate + // the snapshot that was just used. After restoring to S1, the + // sandbox can still take a new snapshot and restore back to + // either S1 or the new one. + let mut sbox = create_test_sandbox(); + let s_init = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + // Restore back to init. + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Mutate again, snapshot, mutate further. + sbox.call::("AddToStatic", 9i32).unwrap(); + let s_post_restore = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + + // Restore to either reachable snapshot. + sbox.restore(s_post_restore.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 9); + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_idempotent() { + // Restoring to the same snapshot twice in a row must produce + // the same observable state both times. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + sbox.call::("AddToStatic", 22i32).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // No mutation between restores. + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // Mutation after the second restore must take effect. + sbox.call::("AddToStatic", 1i32).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 12); +} + +#[test] +fn from_snapshot_then_snapshot_then_restore_to_both() { + // Build sandbox B from snapshot S0 (B inherits S0's id). + // B takes its own snapshot S1 (also tagged with S0's id). Both + // S0 and S1 must be reachable from B via `restore`. + // + // Note: only snapshots taken from a RUNNING sandbox (with + // sregs) are valid restore targets. We therefore start from a + // snapshot of a running sandbox, not a pre-init snapshot. + let mut seed = create_test_sandbox(); + let s0 = seed.snapshot().unwrap(); + + let mut b = MultiUseSandbox::from_snapshot(s0.clone(), HostFunctions::default(), None).unwrap(); + b.call::("AddToStatic", 5i32).unwrap(); + let s1 = b.snapshot().unwrap(); + b.call::("AddToStatic", 10i32).unwrap(); + + // Restore back to S1. + b.restore(s1.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + + // Restore back further to the constructor snapshot S0. + b.restore(s0.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn arc_clone_lineage_two_sandboxes_each_restores_to_either() { + // Two sandboxes built from the SAME Arc share the + // sandbox_id. Each takes its own snapshot. Each must be + // restorable to (a) its own derived snapshot, (b) the shared + // root snapshot, and (c) the OTHER sandbox's derived snapshot + // (because all four snapshots share one id). + // + // Note: the shared root must be a running-sandbox snapshot so + // that restore() can use its sregs. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + let mut b = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + + a.call::("AddToStatic", 3i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + b.call::("AddToStatic", 70i32).unwrap(); + let snap_b = b.snapshot().unwrap(); + + // a: own snap then root then b's snap. + a.restore(snap_a.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 3); + a.restore(snap_root.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 0); + a.restore(snap_b.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 70); + + // b: cross-restore the other way. + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 3); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); + b.restore(snap_b.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 70); +} + +#[test] +fn separate_from_snapshot_calls_share_id_class_through_lineage() { + // Build sandbox A from a running-sandbox snapshot snap_root. + // A takes snap_a. Then build sandbox B from snap_a (a different + // Arc, but B adopts snap_a.sandbox_id == snap_root.sandbox_id). + // B must be restorable to BOTH snap_a and snap_root because + // they all share one id. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + a.call::("AddToStatic", 5i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + let mut b = + MultiUseSandbox::from_snapshot(snap_a.clone(), HostFunctions::default(), None).unwrap(); + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn separate_oci_loads_are_mutually_restore_compatible() { + // Each `from_oci` call rehydrates a structurally identical + // snapshot. Compatibility is determined by memory layout and + // host-function set, so a sandbox built from one load accepts + // a snapshot from any other load of the same image. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let s_x = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let s_y = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + + let mut sbox_x = + MultiUseSandbox::from_snapshot(s_x.clone(), HostFunctions::default(), None).unwrap(); + sbox_x.restore(s_y.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); + + sbox_x.restore(s_x.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn oci_loaded_snapshot_supports_full_lifecycle() { + // Full round-trip: save (from a running sandbox so the loaded + // snapshot is a valid restore target), load, build sandbox, + // mutate, snapshot, mutate, restore, mutate, snapshot, restore. + // Both pre- and post-load snapshots in the loaded id class must + // remain restore-compatible across an arbitrary number of + // cycles. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox.call::("AddToStatic", 1i32).unwrap(); + let s1 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 2i32).unwrap(); + let s3 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + sbox.restore(s1.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 1); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); + sbox.restore(loaded.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Take a fresh snapshot post-restore. It is in the same id + // class and remains interchangeable with the others. + let s_post = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 50i32).unwrap(); + sbox.restore(s_post.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); +} + +#[test] +fn restore_invariant_under_intermediate_mutations() { + // Restoring to S followed by an arbitrary number of + // mutate-then-restore cycles must always produce the same + // post-restore observable state. This is the core "snapshot + // and restore really mean what they say" property. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 13i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + let mutations = [3, 5, 7, 11, 13, 17, 19]; + for m in mutations { + sbox.call::("AddToStatic", m).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + 13, + "restore must reset to the snapshotted value regardless of intermediate mutation {m}" + ); + } +} + +#[test] +fn many_arc_clones_one_snapshot_share_id() { + // Cloning Arc N times yields N references with + // identical sandbox_id. Each sandbox built from a clone shares + // the id and is mutually restore-compatible. Verifies that the + // id-equivalence-class semantics hold for arbitrary fan-out. + // + // The shared root must be a running-sandbox snapshot so the + // sandboxes can restore to it. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let mut sandboxes: Vec = (0..4) + .map(|_| { + MultiUseSandbox::from_snapshot(snap.clone(), HostFunctions::default(), None).unwrap() + }) + .collect(); + + // Each sandbox takes its own derived snapshot tagged with a + // unique value. + let mut snaps: Vec> = Vec::new(); + for (i, s) in sandboxes.iter_mut().enumerate() { + s.call::("AddToStatic", (i as i32 + 1) * 10).unwrap(); + snaps.push(s.snapshot().unwrap()); + } + + // Every sandbox can restore to every snapshot in the class. + for (i, sbox) in sandboxes.iter_mut().enumerate() { + for (j, target) in snaps.iter().enumerate() { + sbox.restore(target.clone()).unwrap(); + let want = (j as i32 + 1) * 10; + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + want, + "sandbox {i} restored to snapshot {j} should observe value {want}" + ); + } + // And to the root snapshot. + sbox.restore(snap.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} From c4aec72b1bd7f0ddb8c2c4a70a512cbb77685868 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:54:14 -0700 Subject: [PATCH 4/5] Add OCI snapshot benchmarks Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/benches/benchmarks.rs | 143 +++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 462e8908d..0f9ca5b2a 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -153,6 +153,15 @@ fn sandbox_lifecycle_benchmark(c: &mut Criterion) { ); } + // Isolates the cost of building a MultiUseSandbox from an + // already-resident Snapshot. The Snapshot is loaded outside the + // timed region. + for size in SandboxSize::all() { + group.bench_function(format!("sandbox_from_snapshot/{}", size.name()), |b| { + bench_sandbox_from_snapshot(b, size) + }); + } + group.finish(); } @@ -347,6 +356,25 @@ fn bench_snapshot_restore(b: &mut criterion::Bencher, size: SandboxSize) { }); } +fn bench_sandbox_from_snapshot(b: &mut criterion::Bencher, size: SandboxSize) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bench"); + { + let mut sbox = create_multiuse_sandbox_with_size(size); + let snapshot = sbox.snapshot().unwrap(); + snapshot.to_oci(&snap_path, "latest").unwrap(); + } + let loaded = std::sync::Arc::new(Snapshot::from_oci(&snap_path, "latest").unwrap()); + + b.iter(|| { + let _ = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + }); +} + fn snapshots_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("snapshots"); @@ -551,6 +579,118 @@ fn shared_memory_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Snapshot Files +// ============================================================================ + +fn snapshot_file_benchmark(c: &mut Criterion) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let mut group = c.benchmark_group("snapshot_files"); + + // Pre-create OCI snapshot images for all sizes. + let dirs: Vec<_> = SandboxSize::all() + .iter() + .map(|size| { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join(size.name()); + let snapshot = { + let mut sbox = create_multiuse_sandbox_with_size(*size); + sbox.snapshot().unwrap() + }; + snapshot.to_oci(&snap_path, "latest").unwrap(); + (dir, snapshot, snap_path) + }) + .collect(); + + // Benchmark: save_snapshot. Wipe the layout between iterations + // so each save measures a fresh write rather than a tag-append. + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_dir = tempfile::tempdir().unwrap(); + let path = snap_dir.path().join("bench"); + let snapshot = &dirs[i].1; + group.bench_function(format!("save_snapshot/{}", size.name()), |b| { + b.iter_batched( + || { + let _ = std::fs::remove_dir_all(&path); + }, + |_| snapshot.to_oci(&path, "latest").unwrap(), + criterion::BatchSize::PerIteration, + ); + }); + } + + // Benchmark: load_snapshot (parse manifest + config + mmap blob). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: load_snapshot_unchecked (skip blob digest verification). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot_unchecked/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_evolve (new + evolve + call) + for size in SandboxSize::all() { + group.bench_function(format!("cold_start_via_evolve/{}", size.name()), |b| { + b.iter(|| { + let mut sbox = create_multiuse_sandbox_with_size(size); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot (load + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("cold_start_via_snapshot/{}", size.name()), |b| { + b.iter(|| { + let loaded = Snapshot::from_oci(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot_unchecked (load unchecked + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function( + format!("cold_start_via_snapshot_unchecked/{}", size.name()), + |b| { + b.iter(|| { + let loaded = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -561,6 +701,7 @@ criterion_group! { guest_call_benchmark_large_param, function_call_serialization_benchmark, sample_workloads_benchmark, - shared_memory_benchmark + shared_memory_benchmark, + snapshot_file_benchmark } criterion_main!(benches); From db6f7fb25ebfde3e64a936647f496204ec76c846 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 4 Jun 2026 09:58:01 -0700 Subject: [PATCH 5/5] WIP: snapshot golden tests scaffolding Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .github/workflows/RegenSnapshotGoldens.yml | 148 ++++++ .github/workflows/ValidatePullRequest.yml | 2 +- Cargo.lock | 32 ++ Justfile | 49 ++ docs/snapshot-golden-tests-plan.md | 488 ++++++++++++++++++ docs/snapshot-versioning.md | 230 +++++++++ src/hyperlight_host/Cargo.toml | 6 + .../src/sandbox/snapshot/file.rs | 8 +- .../src/sandbox/snapshot/file_tests.rs | 167 ++++++ .../src/sandbox/snapshot/mod.rs | 3 +- .../src/sandbox/snapshot/tripwires.rs | 102 ++++ src/hyperlight_host/tests/integration_test.rs | 4 +- .../tests/snapshot_goldens/checks.rs | 346 +++++++++++++ .../tests/snapshot_goldens/fixtures.rs | 140 +++++ .../tests/snapshot_goldens/main.rs | 123 +++++ .../tests/snapshot_goldens/oci.rs | 54 ++ .../tests/snapshot_goldens/platform.rs | 188 +++++++ src/tests/rust_guests/simpleguest/src/main.rs | 143 +++++ 18 files changed, 2225 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/RegenSnapshotGoldens.yml create mode 100644 docs/snapshot-golden-tests-plan.md create mode 100644 docs/snapshot-versioning.md create mode 100644 src/hyperlight_host/src/sandbox/snapshot/tripwires.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/checks.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/fixtures.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/main.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/oci.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/platform.rs diff --git a/.github/workflows/RegenSnapshotGoldens.yml b/.github/workflows/RegenSnapshotGoldens.yml new file mode 100644 index 000000000..3c192d017 --- /dev/null +++ b/.github/workflows/RegenSnapshotGoldens.yml @@ -0,0 +1,148 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +# Regenerate snapshot goldens stored at +# ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens. +# +# Run order: +# +# 1. Bump `GOLDENS_VERSION` in +# `src/hyperlight_host/tests/snapshot_goldens/platform.rs` and any +# matching constants in +# `src/hyperlight_host/src/sandbox/snapshot/tripwires.rs`. +# 2. Push the bump on a branch. +# 3. Dispatch this workflow against the branch, passing the same +# version string as the `version` input. +# +# The workflow walks every (hv, cpu, config) cell, dumps the +# canonical init+call snapshots locally, then `oras copy`s each as +# its own GHCR tag named +# `{version}-{hv}-{cpu}-{profile}-{kind}`. + +name: Regenerate Snapshot Goldens + +on: + workflow_dispatch: + inputs: + version: + description: Goldens version string. Must match GOLDENS_VERSION in source (e.g. "v1"). + required: true + type: string + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + GHCR_IMAGE: ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens + +permissions: + contents: read + packages: write + +defaults: + run: + shell: bash + +jobs: + build-guests: + strategy: + matrix: + config: [debug, release] + uses: ./.github/workflows/dep_build_guests.yml + with: + config: ${{ matrix.config }} + secrets: inherit + + dump-and-push: + needs: build-guests + strategy: + fail-fast: false + matrix: + hypervisor: [kvm, mshv3, hyperv-ws2025] + cpu: [amd, intel] + config: [debug, release] + runs-on: ${{ fromJson( + format('["self-hosted", "{0}", "X64", "1ES.Pool=hld-{1}-{2}", "JobId=regen-goldens-{3}-{4}-{5}-{6}"]', + matrix.hypervisor == 'hyperv-ws2025' && 'Windows' || 'Linux', + matrix.hypervisor == 'hyperv-ws2025' && 'win2025' || matrix.hypervisor == 'mshv3' && 'azlinux3-mshv' || matrix.hypervisor, + matrix.cpu, + matrix.config, + github.run_id, + github.run_number, + github.run_attempt)) }} + steps: + - uses: actions/checkout@v6 + + - uses: hyperlight-dev/ci-setup-workflow@v1.9.0 + with: + rust-toolchain: "1.89" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fix cargo home permissions + if: runner.os == 'Linux' + run: sudo chown -R $(id -u):$(id -g) /opt/cargo || true + + - name: Download Rust guests + uses: actions/download-artifact@v7 + with: + name: rust-guests-${{ matrix.config }} + path: src/tests/rust_guests/bin/${{ matrix.config }}/ + + - name: Install oras + run: | + set -euo pipefail + if command -v oras >/dev/null 2>&1; then + echo "oras already installed: $(oras version)" + exit 0 + fi + ORAS_VERSION=1.2.2 + if [ "${{ runner.os }}" = "Windows" ]; then + curl -sSLO "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_windows_amd64.zip" + unzip -q "oras_${ORAS_VERSION}_windows_amd64.zip" -d oras_install + mv oras_install/oras.exe /usr/bin/oras.exe + else + curl -sSLO "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" + mkdir -p oras_install + tar -xzf "oras_${ORAS_VERSION}_linux_amd64.tar.gz" -C oras_install + sudo install -m 0755 oras_install/oras /usr/local/bin/oras + fi + oras version + + - name: Verify GOLDENS_VERSION matches input + run: | + set -euo pipefail + IN_SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/platform.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + echo "GOLDENS_VERSION in source: ${IN_SRC}" + echo "version input: ${{ inputs.version }}" + if [ "${IN_SRC}" != "${{ inputs.version }}" ]; then + echo "::error::version input does not match GOLDENS_VERSION in source" + exit 1 + fi + + - name: Generate snapshots + run: | + set -euo pipefail + OUT="${{ github.workspace }}/snapshot-goldens-out" + mkdir -p "$OUT" + PROFILE_FLAG=$([ "${{ matrix.config }}" = "release" ] && echo "--release" || echo "") + FEATURES=$([ "${{ matrix.hypervisor }}" = "mshv3" ] && echo "mshv3" || echo "kvm") + if [ "${{ runner.os }}" = "Windows" ]; then FEATURES=""; fi + cargo test $PROFILE_FLAG -p hyperlight-host \ + ${FEATURES:+--no-default-features --features "$FEATURES,build-metadata"} \ + --test snapshot_goldens -- generate "$OUT" + ls -la "$OUT" + + - name: Log in to GHCR + run: | + echo "${{ secrets.GITHUB_TOKEN }}" | oras login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Push goldens to GHCR + run: | + set -euo pipefail + OUT="${{ github.workspace }}/snapshot-goldens-out" + for layout in "$OUT"/*/; do + tag=$(basename "$layout") + tag=${tag%/} + echo "::group::push ${tag}" + oras copy --from-oci-layout "${layout%/}:${tag}" "${GHCR_IMAGE}:${tag}" + echo "::endgroup::" + done diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index 4ab524674..34058a415 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -89,7 +89,7 @@ jobs: # See: https://github.com/actions/runner/issues/2205 if: ${{ !cancelled() && !failure() }} strategy: - fail-fast: true + fail-fast: false matrix: hypervisor: ['hyperv-ws2025', mshv3, kvm] cpu: [amd, intel] diff --git a/Cargo.lock b/Cargo.lock index 6bce49fc9..ce860cd2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,6 +504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -518,6 +519,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.0.0" @@ -947,6 +960,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "euclid" version = "0.22.13" @@ -1700,6 +1719,7 @@ dependencies = [ "kvm-ioctls", "lazy_static", "libc", + "libtest-mimic", "log", "metrics", "metrics-exporter-prometheus", @@ -2156,6 +2176,18 @@ dependencies = [ "libc", ] +[[package]] +name = "libtest-mimic" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" +dependencies = [ + "anstream 1.0.0", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "libz-sys" version = "1.1.23" diff --git a/Justfile b/Justfile index 401897425..ded6d9edf 100644 --- a/Justfile +++ b/Justfile @@ -568,3 +568,52 @@ install-vcpkg: install-flatbuffers-with-vcpkg: install-vcpkg cd ../vcpkg && ./vcpkg install flatbuffers || cd - + +################################### +### SNAPSHOT GOLDEN HELPERS ### +################################### +# Custom-harness test binary that verifies / regenerates snapshot +# goldens stored on an OCI registry. The test binary itself never +# touches the network: it reads only from +# target/snapshot-goldens-cache/{version}/{tag}/. Populating that +# cache is the job of `snapshot-goldens-pull`, which shells out to +# `oras` (install from https://oras.land). + +# Default OCI registry image (without tag) that hosts the goldens. +default-snapshot-goldens-image := "ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens" + +# Verify the local snapshots against the goldens for the current +# GOLDENS_VERSION. Run `snapshot-goldens-pull` first to populate +# the local cache; missing cache entries cause hard test failures +# (the harness does not skip). +snapshot-goldens target=default-target: + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens + +# Pull goldens for the local platform's two tags (init + call) +# from `image` into the on-disk cache used by `snapshot-goldens`. +# Auto-detects hypervisor and CPU vendor on Linux; pass +# `profile=release` to fetch the release-profile tags. +snapshot-goldens-pull image=default-snapshot-goldens-image profile="debug": + #!/usr/bin/env bash + set -euo pipefail + version=$(awk -F'"' '/GOLDENS_VERSION: &str =/{print $2; exit}' src/hyperlight_host/tests/snapshot_goldens/platform.rs) + hv=${HYPERLIGHT_GOLDENS_HV:-$([[ -e /dev/mshv ]] && echo mshv || ([[ -e /dev/kvm ]] && echo kvm))} + cpu=$(awk -F: '/vendor_id/{gsub(/ /,"",$2); print $2; exit}' /proc/cpuinfo \ + | sed 's/GenuineIntel/intel/;s/AuthenticAMD/amd/') + [[ -n "${hv:-}" && -n "${cpu:-}" ]] || { echo "snapshot-goldens-pull: could not detect hv/cpu (set HYPERLIGHT_GOLDENS_HV)" >&2; exit 1; } + for kind in init call; do + tag="${version}-${hv}-${cpu}-{{ profile }}-${kind}" + dir="target/snapshot-goldens-cache/${version}/${tag}" + mkdir -p "${dir}" + oras copy --to-oci-layout "{{ image }}:${tag}" "${dir}:${tag}" + done + +# Generate the canonical local snapshots into the cache that +# `snapshot-goldens` reads from. Locally, `snapshot-goldens-generate` +# followed by `snapshot-goldens` is a pure local round-trip with +# no registry involved. The regen workflow calls the harness +# directly with an explicit out-dir for staging. +snapshot-goldens-generate target=default-target: + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens -- generate diff --git a/docs/snapshot-golden-tests-plan.md b/docs/snapshot-golden-tests-plan.md new file mode 100644 index 000000000..f04766219 --- /dev/null +++ b/docs/snapshot-golden-tests-plan.md @@ -0,0 +1,488 @@ +# Snapshot golden tests: design and plan + +This document describes the design of golden snapshot tests for hyperlight's +on-disk snapshot file format (`Snapshot::to_file` / `Snapshot::from_file`). + +The goal is to catch silent breaks of the snapshot ABI before they ship. +Currently only two version numbers (`FormatVersion` for the header, and +`SNAPSHOT_ABI_VERSION` for the memory blob) defend the format, and both +require manual maintainer discipline to bump. Golden tests automate that +discipline. + +## Two tiers of tests + +### Tier 1: byte-layout assertions (no fixture, no hypervisor) + +Pure compile-time-style assertions over the on-disk struct layout. No +fixture file, no hypervisor required. Cheap, exhaustive, and never out of +date. + +Asserts: + +* Sizes of `RawPreamble`, `RawHeaderV1`, `RawSegmentRegister`, + `RawTableRegister`, `RawSregs`, `RawHashes`, `FIXED_PREFIX_SIZE`. +* Field offsets in every raw struct (caught by `offset_of!`). +* Constants: `SNAPSHOT_MAGIC`, `SNAPSHOT_ABI_VERSION`, + `FormatVersion::V1`, `ArchTag::*`, `HypervisorTag::*` discriminants. +* `NextAction` tag round-trip (Initialise -> 0, Call -> 1). +* `HyperlightPEB` field offsets and total size, + `GuestMemoryRegion::SERIALIZED_SIZE`. + +If any of these change without a deliberate update, Tier 1 fails. The +maintainer must either update the test (and bump version constants) or +back the change out. + +### Tier 2: golden fixture tests (one fixture per arch+hv) + +End-to-end load-and-call tests against checked-in `.hls.zst` snapshot +files. Catches everything Tier 1 cannot: + +* Memory blob ABI semantics (PEB contents, layout arithmetic on reload). +* Hash input order (a different concatenation order would change + `header_hash` even with same fields). +* Initialise-path entry convention. +* Call-path sregs apply convention. +* Dispatch convention. +* Hypercall port numbers. +* Guest-call wire format. +* Host-function flatbuffer schema. +* Page table relocation. +* Stack/BSS/heap captured contents. + +## ABI surfaces enumerated (A-P) + +Every distinct way a snapshot could break, labelled A-P. Each surface +is annotated with which test tier catches it and which fixture exercises +it. Missing a surface means a silent break would ship. + +### A. File header surface (Tier 1) + +Sizes, offsets, and discriminant values of: + +* `RawPreamble`, `RawHeaderV1`, `RawSregs`, `RawSegmentRegister`, + `RawTableRegister`, `RawHashes`, `FIXED_PREFIX_SIZE`. +* `SNAPSHOT_MAGIC`, `SNAPSHOT_ABI_VERSION`, `FormatVersion::V1`, + `ArchTag::*`, `HypervisorTag::*`, `NextAction` tag values. +* `HyperlightPEB` field offsets and total size, + `GuestMemoryRegion::SERIALIZED_SIZE`. + +A change here that doesn't bump `FormatVersion` (or `SNAPSHOT_ABI_VERSION` +for the PEB) is a silent break. Tier 1 catches it without a fixture. + +### B. Memory blob layout (Tier 2: any golden) + +Region order and arithmetic in `SandboxMemoryLayout`: + +``` +0x1000 Code (code_size, R+W+X) + PEB (1 page, R+W on default; R only on regular config) + Heap (heap_size, R+W or R+W+X with executable_heap) + InitData (init_data_size, init_data_permissions) +``` + +Plus the PT tail appended past the guest-visible KVM slot. + +Key arithmetic: + +* `peb_offset = code_size.next_multiple_of(4096)` +* `peb_address = BASE_ADDRESS + peb_offset` = `0x1000 + peb_offset` +* `guest_heap_buffer_offset = (peb_offset + size_of::()).next_multiple_of(4096)` +* `init_data_offset = (guest_heap_buffer_offset + heap_size).next_multiple_of(4096)` +* `scratch_base_gpa(scratch_size) = MAX_GPA - scratch_size + 1` +* `scratch_base_gva(scratch_size) = MAX_GVA - scratch_size + 1` + +If any of these change, the captured blob has data at the OLD addresses +but the reloaded layout expects the NEW addresses. Caught by any golden. + +### C. PEB contents (Tier 2: any golden) + +The bytes the host writes into the PEB region during `write_peb`: + +* `input_stack`: { size = input_data_size, ptr = scratch_base_gva(scratch_size) } +* `output_stack`: { size = output_data_size, ptr = scratch_base_gva + input_data_size } +* `init_data`: { size = unaligned_memory_size - init_data_offset, ptr = BASE_ADDRESS + init_data_offset } +* `guest_heap`: { size = heap_size, ptr = BASE_ADDRESS + guest_heap_buffer_offset } + +If field order, ptr/size encoding, or any address calculation changes, +the captured PEB bytes will be wrong on load. Caught by any golden that +calls a guest fn (which requires the PEB to be correct). + +### D. Hypercall constants (Tier 2: F1, F4, F5, F6, F7) + +From `OutBAction` in `outb.rs`: + +* Log = 99 +* CallFunction = 101 +* Abort = 102 +* DebugPrint = 103 + +If any of these port numbers change without an ABI bump, an old guest +blob issues `out 101` and the new host listens for `out 99`. Caught by +any golden that calls a host fn (`Add`, `PrintOutput`, etc.). + +### E. Initialise entry calling convention (Tier 2: F1, F2, F3) + +`vm.initialise` sets up registers per `hyperlight_vm/x86_64.rs::initialise()`: + +``` +RIP = entrypoint_addr (from snapshot) +RSP = stack_top_gva - 8 +RDI = peb_addr +RSI = seed +RDX = page_size +RCX = guest_log_filter +``` + +Guest's `generic_init` consumes these from the same registers. A change +without an ABI bump lands the guest in old code expecting old register +positions. Caught by any Initialise-kind golden. + +### F. Dispatch call calling convention (Tier 2: F4, F5, F6, F7, F8) + +`dispatch_call_from_host` jumps to a captured `entrypoint_addr` (RIP) +which is the dispatch function. Stack frame at top of stack from prior +captured RSP. Any change to the dispatch ABI breaks Call snapshots. + +### G. Guest-call wire format (Tier 2: F6) + +Flatbuffer schema for `FunctionCall` and `FunctionCallResult`. Args are +encoded as `ParameterValue` union with 9 variants: + +* hlint (i32), hluint (u32), hllong (i64), hlulong (u64), hlfloat (f32), + hldouble (f64), hlstring, hlbool, hlvecbytes + +Returns add `hlvoid` (10 return variants total). + +A schema change in any cell breaks that arg type. F6 exercises every +variant in both arg and return position via `EchoT(value) == value` +calls. + +### H. Host function signatures wire format (Tier 2: F7) + +`HostFunctionDetails` flatbuffer persisted on disk. Schema: + +``` +HostFunctionDefinition { + function_name: string, + parameters: [ParameterType], + return_type: ReturnType, +} +``` + +ParameterType is u8 enum with 9 values; ReturnType u8 with 10. Wrong +serialization causes `validate_host_functions` to reject a correct +registration on load. F7 exercises every primitive type as a host fn +parameter and return. + +### I. sregs apply convention (Tier 2: F4, F5, F7, F8) + +`apply_sregs(cr3, sregs)` overrides `sregs.cr3` with the new relocated +PT base, then writes everything else. Captured CR3 is intentionally NOT +used. The rest matters: + +* All 8 segment registers (cs/ds/es/fs/gs/ss/tr/ldt) including hidden + cache fields (unusable, type, granularity, db). +* GDT + IDT base/limit. +* CR0/CR2/CR4/CR8 control bits. +* EFER (LMA, LME, SCE, NX matter on x86_64). +* APIC base. +* interrupt_bitmap (32 bytes). + +Caught by any Call golden (resume requires sregs to apply correctly). + +### J. PT relocation (Tier 2: F4, F5) + +The captured page tables in the blob are relocated during `Snapshot::new` +to a new physical address, then on restore they're copied to scratch +and CR3 points there. If the PTE encoding, self-map, or relocation +arithmetic changes, walks fail. F5 (heap activity, may add PT pages +mid-execution) gives the most thorough exercise. + +### K. Init data round-trip (Tier 2: F2) + +`write_init_data` copies bytes verbatim into the init_data region. If +the region offset or permission bits get reinterpreted, init data lands +at the wrong address or gets the wrong protection. F2 exercises both: +known bytes round-trip + explicit `MemoryRegionFlags` permissions. + +### L. Layout-arithmetic on reload (Tier 2: F3) + +Reload reconstructs `SandboxMemoryLayout` via `SandboxMemoryLayout::new( +cfg, code_size, init_data_size, init_data_permissions)`, then calls +`set_pt_size` and `set_snapshot_size`. If any computation in `new` +changes (for example `min_scratch_size`), reload computes different +addresses than the captured blob has, even if the blob itself is +unchanged. F3 exercises this with non-default sizes for input/output/ +heap/scratch, where any silent shift in the arithmetic moves region +boundaries differently. + +### M. Stack contents (Tier 2: F4, F5, F7, F8) + +A `Call` snapshot is taken at the top of `dispatch_function` after a +halt. The stack contains return addresses pointing into guest code. If +those code addresses move, restore lands in the wrong instructions. Any +Call golden exercises this; F4 (after non-trivial guest call) is the +most basic case. + +### N. Static (BSS) state (Tier 2: F4, F8) + +`COUNTER` lives in BSS in the code region. Snapshot captures the bytes; +reload writes them back. Catches "BSS region moved" or "writes to BSS +not captured during snapshot". F4 sets COUNTER to 42 then snapshots and +asserts on reload. + +### O. Heap state (Tier 2: F5) + +Heap allocator is `LockedHeap` initialized at startup with +`(*peb_ptr).guest_heap.ptr` as its base. A change to the heap allocator +that affects the on-heap data structures would mean the captured +allocator state is invalid on resume. F5 allocates a `Vec` with a +known pattern, pins it in a static, snapshots, and asserts the bytes +read back identically. + +### P. Snapshot-then-snapshot chain (Tier 2: F8) + +After loading and running, taking another snapshot must produce a +working snapshot. Catches "load did not properly reconstruct invariants +needed by `snapshot()`". F8 generates this on the fly from F4 plus a +state mutation, snapshots that, reloads, asserts. + +## Fixture matrix + +Two checked-in fixtures per (arch, hypervisor). Each one bundles +every surface that needs end-to-end coverage for its snapshot kind. +Per-surface assertions live in many small `#[test]` functions +against the loaded sandbox, so a failure points to one specific +surface even though all tests share two underlying fixture files. + +### F-init: `init` + +* Snapshot kind: Initialise (preinit). No host fns. +* Build configuration: non-default `input_data_size`, + `output_data_size`, `heap_size`, `scratch_size`. Init-data blob + with a known byte pattern + explicit `MemoryRegionFlags::READ` + permissions. +* Tests against this fixture: + * `init_basic_call` (A, B, C, D, E, N): `GetStatic() == 0` after + re-running guest init. + * `init_data_round_trip` (K): guest reads init_data via + `ReadFromUserMemory(num, expected)`. + * `init_custom_layout_works` (L): `Echo("custom-layout") == + "custom-layout"`. + +### F-call: `call` + +* Snapshot kind: Call (mid-execution). +* Setup before snapshot: register the `HostEcho*` family, + `evolve()`, then `AddToStatic(42)`, + `AllocAndWritePattern(1024)`, then call each + `RoundTripHostT(initial_value)` once. +* Tests against this fixture: + * `call_captured_bss` (F, I, J, M, N): `GetStatic() == 42`. + * `call_captured_heap_pattern` (O): `ReadPattern() == + expected_bytes`. + * `call_guest_types_round_trip` (G): for each primitive `T` + (`i32`, `u32`, `i64`, `u64`, `f32`, `f64`, `bool`, `String`, + `Vec`, `()`), call `EchoT(value)` and assert it + round-trips. Includes extremes (i32::MIN/MAX, NaN, infinities, + empty String/Vec) plus `PrintElevenArgs(...)` for the + multi-arg path. + * `call_host_round_trips` (H, D, G return path): for each + primitive `T`, register the matching `HostEchoT` host fn at + load time, then call `RoundTripHostT(value)` and assert it + round-trips through the host. + * `call_chained_snapshot` (P): load F-call, mutate state, take a + fresh snapshot, write to a tempfile, load it back, assert. + Generated on the fly; not checked in. + +### Coverage matrix + +| | F-init | F-call | +|---|--------|--------| +| A | ✓ load | ✓ load | +| B | ✓ | ✓ | +| C | ✓ | ✓ | +| D | ✓ | ✓ | +| E | ✓ | | +| F | | ✓ | +| G | | ✓ | +| H | | ✓ | +| I | | ✓ | +| J | | ✓ | +| K | ✓ | | +| L | ✓ | | +| M | | ✓ | +| N | ✓ | ✓ | +| O | | ✓ | +| P | | ✓ | + +## Type matrix + +Existing simpleguest functions covering each primitive arg/ret type: + +| Type | Existing | Need to add | +|---|---|---| +| `i32` | `Add`, `AddToStatic` | `EchoI32` for clean echo | +| `u32` | none | `EchoU32` | +| `i64` | none | `EchoI64` | +| `u64` | `GetDr0`, `SetDr0` | `EchoU64` | +| `f32` | `EchoFloat` ✓ | none | +| `f64` | `EchoDouble` ✓ | none | +| `bool` | `CheckMapped` (no bool arg) | `EchoBool` | +| `String` | `Echo` ✓ | none | +| `Vec` | `GetSizePrefixedBuffer` ✓ | none | +| `()` | `Spin`, `TriggerException` | optional `NoOp` | +| Multi-arg (1-11) | `PrintTwoArgs..PrintElevenArgs` ✓ | none | + +New simpleguest functions: + +```rust +#[guest_function("EchoI32")] fn echo_i32(v: i32) -> i32 { v } +#[guest_function("EchoU32")] fn echo_u32(v: u32) -> u32 { v } +#[guest_function("EchoI64")] fn echo_i64(v: i64) -> i64 { v } +#[guest_function("EchoU64")] fn echo_u64(v: u64) -> u64 { v } +#[guest_function("EchoBool")] fn echo_bool(v: bool) -> bool { v } +``` + +Round-trip via host (for F7): + +```rust +#[host_function("HostEchoI32")] fn host_echo_i32(v: i32) -> i32; +// ... one per primitive type +#[guest_function("RoundTripHostI32")] fn rt_i32(v: i32) -> Result { host_echo_i32(v) } +// ... one per primitive type +``` + +Heap allocation pattern (for F5): + +```rust +static mut HEAP_BUF: Option> = None; +#[guest_function("AllocAndWritePattern")] fn alloc_and_write(len: u64) { ... } +#[guest_function("ReadPattern")] fn read_pattern() -> Vec { ... } +``` + +## On-disk file layout + +``` +src/hyperlight_host/tests/snapshot_goldens/fixtures/ + init_kvm.hls + init_mshv.hls + init_whp.hls + call_kvm.hls + call_mshv.hls + call_whp.hls + README.md (regen instructions) +``` + +Per-HV: tests skip if no matching fixture for the local HV. CI matrices +ensure each HV is exercised by at least one job. + +Files are committed uncompressed. Each fixture is roughly 5 MB +(simpleguest with the kitchen-sink configuration). 2 fixtures × 3 HVs +~30 MB total when fully populated. We start with KVM only (~10 MB) +and add HVs as CI exercises them. + +## Test scaffolding + +Per-HV skip pattern. A test runner per fixture, all sharing the same +shape: + +```rust +fn run_golden( + fixture_name: &str, + register_fns: impl FnOnce(&mut HostFunctions), + check: impl FnOnce(&mut MultiUseSandbox), +## Test scaffolding + +Lives next to the existing `file_tests.rs` and `abi_tripwires.rs` +in `src/hyperlight_host/src/sandbox/snapshot/golden_tests.rs`. As +a unit-test module it has access to `pub(crate)` items +(`Snapshot::from_env`, `Registerable`, etc.) without exposing new +public API. + +Per-HV skip pattern. A `load_golden(name, register_fns)` helper +detects the local hypervisor, looks for `{name}_{hv}.hls`, returns +`None` if missing so the test can `let Some(sbox) = ... else { +return };`. Tests that don't match the local HV silently skip. +CI matrices ensure each HV is exercised in at least one job. + +## Regeneration story + +A single `#[test]` named `golden_regen` rewrites the fixtures. +Gated behind `HYPERLIGHT_REGEN_GOLDENS`: + +* not set: skips silently. +* `=1`: writes any fixture that does not yet exist for the local + HV, refuses to overwrite existing ones. +* `=force`: same but overwrites. + +Run via: + +``` +HYPERLIGHT_REGEN_GOLDENS=1 cargo test -p hyperlight-host \ + --lib sandbox::snapshot::golden_tests::golden_regen -- --nocapture +``` + +Documentation in fixtures/README.md: when to regenerate (any change +to header constants, ABI version, PEB layout, layout arithmetic, +or guest startup), and the recipe to run. + +## Git considerations + +* Fixtures committed uncompressed. Git auto-detects them as + binary so diffs are clean. +* Regenerating golden snapshots is a separate, dedicated PR (not + bundled with the change that required the bump). Keeps the diff + focused. +* Do not use Git LFS. Total size is ~10 MB per HV today, ~30 MB + fully populated. Well below the threshold where LFS becomes + worthwhile. + +## Implementation order + +1. Tier 1 layout assertions (standalone, no fixtures). +2. Add EchoX, HostEchoX, RoundTripHostX guest functions to + simpleguest. +3. Test scaffolding (`load_golden`, `golden_regen`). +4. `init` + its tests. +5. `call` + its tests. + +## Known gaps (out of scope) + +* FPU/XMM register state: not in `CommonSpecialRegisters`. xsave is not + persisted in the snapshot today. If guest behaviour ever depends on + captured FPU state, the snapshot itself needs to grow that field. + not a versioning issue. +* MSRs: not captured. Today's guest does not depend on any MSR state + surviving restore (no SYSCALL, no FS/GS_BASE TLS, no perf counters). + If a future guest does, snapshot must grow MSR capture. +* PRNG seed: each load of an Initialise snapshot reseeds picolibc with + fresh randomness from the host. Call snapshots resume with the + captured PRNG state intact, so loading the same Call snapshot N + times in parallel produces N sandboxes that share the same `rand()` + sequence. Guest code that needs per-sandbox randomness from a Call + snapshot must reseed itself after restore. Snapshot format change + not required. +* TSC continuity: the guest's TSC view jumps forward (or backward, on + another machine) on restore. We do not capture + `MSR_IA32_TSC` and we do not write `MSR_IA32_TSC_OFFSET` on restore. + Guest code that uses TSC for absolute time will see a discontinuity. + Affects only the `trace_guest` feature today; would need to be + fixed if snapshots ever expose timing guarantees to guest code. +* Cross-HV-version portability: snapshots are not promised to load on + a different KVM/MSHV/WHP version than the one that produced them. + Sregs hidden-cache encoding can drift across hypervisor releases. We + do not tag the snapshot with the hypervisor version. Match Firecracker: + document the limitation, defer until a real failure mode appears. +* Cross-CPU-vendor portability: empirically loads cross-vendor on KVM + for the simpleguest workload (verified locally), but the snapshot + bytes differ in segment hidden-cache fields (cs.limit, segment.g). + We do not normalize sregs at write time. If a future guest depends + on those fields meaning what the originating vendor said they meant, + cross-vendor would break. Consider sregs normalization (Option A in + the issue tracker) if this becomes a real concern. +* Hypervisor-internal state like APIC pending interrupts. +* Cross-OS goldens: a Linux KVM snapshot loaded on a Linux+MSHV-only host + triggers the existing hypervisor-mismatch error path. Already covered + by `hypervisor_mismatch_rejected`. +* nanvix-unstable feature: deprecated, intentionally not covered. diff --git a/docs/snapshot-versioning.md b/docs/snapshot-versioning.md new file mode 100644 index 000000000..21fb677e7 --- /dev/null +++ b/docs/snapshot-versioning.md @@ -0,0 +1,230 @@ +# Snapshot versioning + +Hyperlight snapshots are written to disk as OCI image layouts and may be +loaded by a different build than the one that produced them. This +document describes how to evolve the snapshot format while keeping +existing snapshots loadable, or while rejecting them with a clear error. + +## What is versioned + +A snapshot carries three independently evolvable version markers: + +* **Memory blob ABI**, `SNAPSHOT_ABI_VERSION` (a `u32` inside the + config blob, defined in + [src/hyperlight_host/src/sandbox/snapshot/file.rs](../src/hyperlight_host/src/sandbox/snapshot/file.rs)). + This is the host/guest runtime contract baked into the captured + memory: the `HyperlightPEB` layout (the struct host and guest share + to exchange state, field offsets and types), the `OutBAction` port + numbers (the I/O ports the guest writes to for `Log`, `CallFunction`, + `Abort`, `DebugPrint`), the layout of the sandbox memory regions + (stack, heap, guest binary, input and output buffers, page tables), + and the calling convention used for guest function entry. The loader + trusts the captured bytes to match this contract, so any change here + invalidates older snapshots unless an explicit compat path translates + them. +* **Snapshot blob encoding**, `MT_SNAPSHOT_V1` + (`application/vnd.hyperlight.snapshot.v1`), aliased as + `MT_SNAPSHOT_CURRENT`. This is the on-wire format of the snapshot + blob: framing, section ordering, alignment, dirty/zero-page elision, + anything about how the bytes are packed inside the OCI layer. +* **Config schema**, `MT_CONFIG_V1` + (`application/vnd.hyperlight.sandbox.config.v1+json`), aliased as + `MT_CONFIG_CURRENT`. This is the JSON shape of the config blob: + field names, types, required vs optional, the descriptors the loader + needs in order to reconstruct the sandbox (memory sizes, buffer + sizes, `abi_version`, `hyperlight_version`, etc.). Renaming a field, + changing its type, or adding a required field is a schema change and + bumps this constant. + +The `OCI_LAYOUT_VERSION` constant is pinned by the OCI image-layout +spec at `1.0.0` and is not something Hyperlight revs. + +The config blob also records `hyperlight_version`, the `CARGO_PKG_VERSION` +of the host crate at write time. This is informational only. The loader +records it for diagnostics and does not gate loading on it. + +## Enforcement + +The format is large and easy to change by accident. Two mechanisms +catch a change to it so reviewers do not have to spot every break by +eye, and so a developer who breaks the format unintentionally finds +out at build time rather than in production. + +Compile-time tripwires in +[src/hyperlight_host/src/sandbox/snapshot/tripwires.rs](../src/hyperlight_host/src/sandbox/snapshot/tripwires.rs) +hold a copy of every value that defines the format: +`SNAPSHOT_ABI_VERSION`, the snapshot and config media-type strings, the +OCI layout version, every `HyperlightPEB` field offset and the struct's +total size, and every `OutBAction` discriminant. If the source value +drifts from the copy in `tripwires.rs`, the crate fails to compile. + +The snapshot golden verify test +(`cargo test -p hyperlight-host --test snapshot_goldens`) loads +snapshots from a local cache (populated by `just snapshot-goldens-pull`, +which fetches the tag set for the current `GOLDENS_VERSION` from GHCR) +and runs them through the current loader. If the new loader cannot +decode the old bytes, the test fails. + +## Changing the format + +When you change anything on the list above, you have three options. + +### Option 1: avoid the break + +Restructure the change so the on-disk contract stays put. Prefer this +whenever possible. + +### Option 2: backwards-compatible break + +You break the ABI for new snapshots, and you teach the loader to +accept the older version as well by translating it into the current +contract on the fly. For example, if you renumber the `OutBAction` +ports, the host's port dispatch keeps a match arm for the old port +number alongside the new one, so a resumed v1 guest that still writes +to the old port is handled correctly. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. The writer stamps this value into + every config blob it produces. +4. Update `Snapshot::from_oci` to load both the old and the new + format, dispatching on `abi_version`. +5. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +6. Bump `GOLDENS_VERSION` to the next major and push fresh goldens. See + [Goldens version numbering](#goldens-version-numbering) and + [Regenerating goldens](#regenerating-goldens). +7. Keep the old goldens on GHCR and extend the verify test to exercise + them as well, so the compatibility path stays covered. See + [Verifying multiple golden versions](#verifying-multiple-golden-versions). + +Old snapshots on disk continue to load. New snapshots use the new +contract. The compatibility path is now part of the supported surface +and must stay correct until you formally drop the old major. + +### Option 3: hard break + +You change the contract and the loader rejects old snapshots outright. +Using the same `OutBAction` example, the host's port dispatch only +matches on the new port number, and a resumed v1 guest writing to the +old port has nowhere to land. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. +4. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +5. Bump `GOLDENS_VERSION` to the next major and push fresh goldens. See + [Goldens version numbering](#goldens-version-numbering) and + [Regenerating goldens](#regenerating-goldens). +6. Record the break in `CHANGELOG.md`. Anyone holding old snapshots on + disk has to regenerate them against the new build. + +The loader's single-version check enforces the rejection. An old +snapshot loaded against the new build fails the +`abi_version == SNAPSHOT_ABI_VERSION` test with a clear error. + +## Regenerating goldens + +The verify test (`cargo test -p hyperlight-host --test snapshot_goldens`) +loads the tag set `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}-{kind}` from a +local cache that `just snapshot-goldens-pull` populates from GHCR. After +bumping `GOLDENS_VERSION`, the matching tags must be pushed before the +verify job can pass. + +### Iterating locally + +`just snapshot-goldens-generate` regenerates the cache for the current +`GOLDENS_VERSION` from the local source, so the verify test runs green +against your in-progress changes on your own platform. Use this loop +for iteration that does not need to cross hypervisor boundaries. To +validate the change on every platform, dispatch the regen workflow +(see [Push procedure](#push-procedure)). + +### Goldens version numbering + +`GOLDENS_VERSION` follows a `vMAJOR.MINOR` scheme. The tag set on GHCR +for a given version is keyed by the full string, so `v1.0`, `v1.1`, and +`v2.0` are independent namespaces that never collide. + +* Bump **MAJOR** when the snapshot ABI changes (Option 2 or Option 3 + above). The old tag set stays on GHCR untouched. +* Bump **MINOR** when the set of golden checks changes but the ABI does + not (for example, a new check is added). The new tag set contains + every check, including the unchanged ones, regenerated against the + current source. + +A version is frozen once `main` references it. The regen workflow, +before every push, reads `GOLDENS_VERSION` from the tip of `main` and +refuses to push to that tag. Any other tag, including the version the +current PR is introducing, is in-flight and may be overwritten freely. +This lets a developer iterate on a v1 to v2 bump by pushing v2 as many +times as needed, with no risk of touching v1. + +Overwriting a tag leaves the previous manifest on GHCR as an orphan. +A scheduled cleanup workflow that reaps orphans and abandoned in-flight +tags is a follow-up. + +### Push procedure + +1. Land the source bumps on a branch. +2. Dispatch the `Regenerate Snapshot Goldens` workflow against that + branch. The workflow walks every supported + `(hypervisor, cpu, profile)` combination on the self-hosted runner + pool, generates the canonical init and call snapshots locally with + `cargo test --test snapshot_goldens -- generate `, and pushes + each OCI layout to GHCR using `oras copy`. Before every push it + reads `GOLDENS_VERSION` from the tip of `main` and refuses the push + if the target tag matches. +3. The verify job on the PR can now find the tags and passes. + +The workflow takes a `version` input that must equal `GOLDENS_VERSION` +in source. This guards against pushing a tag set the test binary would +ignore. + +## Adding a new check under the current ABI + +Adding a new entry to `CHECKS` does not change the snapshot ABI. It +does change the set of tags the verify test expects, so it requires a +minor `GOLDENS_VERSION` bump. + +Steps: + +1. Add the entry to `CHECKS` in + `src/hyperlight_host/tests/snapshot_goldens/`. +2. Bump `GOLDENS_VERSION` minor (e.g. `v1.2` to `v1.3`). The verify + test now looks for tags under the new prefix and fails until they + exist. +3. A maintainer dispatches `Regenerate Snapshot Goldens` against the + branch with `version` set to the new `GOLDENS_VERSION`. The workflow + runs every check on every combination and publishes a complete tag set + under the new prefix. The previous tag set stays on GHCR untouched. +4. The verify job finds the new tag set and passes. + +The previous minor's tags can be deleted from GHCR once nothing depends +on them. They are not loaded by the verify test once `GOLDENS_VERSION` +has moved on. + +## Verifying multiple golden versions + +The verify test pulls exactly one tag set, the one for the current +`GOLDENS_VERSION`. That covers the hard-break case (Option 3), where a +fresh tag set replaces the previous one. + +The backwards-compatible case (Option 2) needs more. A v1 loader path +is only correct if real v1 goldens load against the new build, which +means verifying against multiple versions in the same run. + +The intended design is to replace the single `GOLDENS_VERSION` constant +with a slice of currently supported major versions, e.g. +`pub const GOLDENS_VERSIONS: &[&str] = &["v1.3", "v2.0"];`, and have +the verify test run every check against every entry. Dropping an old +major is then a one-line removal from that slice. + +This is a design sketch. The single-version variant suffices for +Option 3. Build the multi-version variant the first time you take +Option 2. diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index a774b5786..07dc7d2f2 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -110,6 +110,7 @@ metrics-util = "0.20.4" metrics-exporter-prometheus = { version = "0.18.3", default-features = false } serde_json = "1.0" hyperlight-component-macro = { workspace = true } +libtest-mimic = "0.8.2" [target.'cfg(windows)'.dev-dependencies] windows = { version = "0.62", features = [ @@ -148,3 +149,8 @@ guest-counter = ["hyperlight-common/guest-counter"] [[bench]] name = "benchmarks" harness = false + +[[test]] +name = "snapshot_goldens" +path = "tests/snapshot_goldens/main.rs" +harness = false diff --git a/src/hyperlight_host/src/sandbox/snapshot/file.rs b/src/hyperlight_host/src/sandbox/snapshot/file.rs index 7a4588b02..0a87a6eba 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file.rs @@ -92,7 +92,7 @@ use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; // --- Constants ------------------------------------------------------ -const OCI_LAYOUT_VERSION: &str = "1.0.0"; +pub(super) const OCI_LAYOUT_VERSION: &str = "1.0.0"; // Media types are versioned by suffix. The loader matches each // version specifically (no `_CURRENT` shortcut on the read side); the @@ -103,15 +103,15 @@ const OCI_LAYOUT_VERSION: &str = "1.0.0"; // 3. Add a dispatch arm in the loader that converts v1 -> v2 (or // rejects v1 if no compatibility window is offered). const MT_CONFIG_V1: &str = "application/vnd.hyperlight.sandbox.config.v1+json"; -const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +pub(super) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; const MT_SNAPSHOT_V1: &str = "application/vnd.hyperlight.snapshot.v1"; -const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; +pub(super) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; /// ABI version for the snapshot memory blob. Bumped whenever the /// host-guest contract for the bytes inside the snapshot blob changes /// (PEB layout, calling convention, init state, etc.). Independent of /// the config blob's media-type version. -const SNAPSHOT_ABI_VERSION: u32 = 1; +pub(super) const SNAPSHOT_ABI_VERSION: u32 = 1; /// Maximum size of the config JSON blob. Bounds the allocation done /// before we parse the JSON. diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs index 171ff3779..7c04e92c6 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -2560,3 +2560,170 @@ fn many_arc_clones_one_snapshot_share_id() { assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); } } + +// ============================================================================= +// `from_snapshot` config plumbing. +// ============================================================================= +// +// `from_snapshot` accepts a caller-supplied `SandboxConfiguration`. +// Layout fields must be silently overridden by the snapshot (the +// on-disk memory blob already encodes those sizes). Runtime fields +// must take effect. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().get_scratch_size(); + + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + sbox2.call::("GetStatic", ()).unwrap(); + + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().get_scratch_size(), original_scratch); +} + +/// `from_snapshot` honors `guest_core_dump=true` so that +/// `generate_crashdump_to_dir` writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file when guest_core_dump=true" + ); +} + +/// `from_snapshot` honors `guest_core_dump=false` so that +/// `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// Loading from OCI must reset `snapshot_generation` to 0, regardless +/// of what generation the source sandbox was at when it saved. +#[test] +fn snapshot_generation_resets_on_oci_load() { + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + + let mut sbox = create_test_sandbox(); + // Bump generation by taking + restoring a snapshot a few times. + for _ in 0..3 { + let s = sbox.snapshot().unwrap(); + sbox.restore(s).unwrap(); + } + let live = sbox.snapshot().unwrap(); + assert!( + live.snapshot_generation() > 0, + "expected nonzero generation after restore cycles" + ); + + live.to_oci(&oci_dir, "gen-reset").unwrap(); + let loaded = Snapshot::from_oci(&oci_dir, "gen-reset").unwrap(); + assert_eq!( + loaded.snapshot_generation(), + 0, + "snapshot_generation must reset to 0 on OCI load" + ); +} + +/// Non-default `init_data_permissions` survive an OCI round-trip +/// byte-for-byte. The default code path uses `READ`, so this pins +/// `READ | WRITE` instead. A regression in the permission +/// serialisation would silently downgrade or upgrade access to the +/// init_data region. +#[test] +fn round_trip_preserves_non_default_init_data_permissions() { + use crate::mem::memory_region::MemoryRegionFlags; + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::uninitialized::{GuestBlob, GuestEnvironment}; + + let path = simple_guest_as_string().unwrap(); + let data: &[u8] = b"perm-pinned-init-data"; + let env = GuestEnvironment { + guest_binary: GuestBinary::FilePath(path), + init_data: Some(GuestBlob { + data, + permissions: MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, + }), + }; + let snap = Snapshot::from_env(env, SandboxConfiguration::default()).unwrap(); + let expected = snap.layout().init_data_permissions; + assert_eq!( + expected, + Some(MemoryRegionFlags::READ | MemoryRegionFlags::WRITE), + "fixture must produce non-default init_data_permissions", + ); + + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + snap.to_oci(&oci_dir, "perms").unwrap(); + let loaded = Snapshot::from_oci(&oci_dir, "perms").unwrap(); + assert_eq!(loaded.layout().init_data_permissions, expected); +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index 77647cada..f78841ccc 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -16,6 +16,7 @@ limitations under the License. mod file; mod file_tests; +mod tripwires; use std::collections::{BTreeMap, HashMap}; @@ -274,7 +275,7 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { impl Snapshot { /// Create a new snapshot from the guest binary identified by `env`. With the configuration /// specified in `cfg`. - pub(crate) fn from_env<'a, 'b>( + pub fn from_env<'a, 'b>( env: impl Into>, cfg: SandboxConfiguration, ) -> Result { diff --git a/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs new file mode 100644 index 000000000..b8cb2ea0c --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs @@ -0,0 +1,102 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Compile-time tripwires for the snapshot ABI. +//! +//! Each assertion below pins one piece of the on-disk or in-memory +//! contract that snapshots depend on: the manifest media types, the +//! OCI Image Layout version, the `HyperlightPEB` field offsets, and +//! the `OutBAction` port numbers. A change to any of these means +//! snapshots produced by older builds can no longer be loaded +//! correctly by this build. +//! +//! When one of these assertions fires, the change is breaking the +//! snapshot ABI. The fix is one of: +//! +//! * Avoid the break entirely. Reshape the change so the on-disk +//! contract does not move. +//! * Make the change backwards compatible (add a versioned variant, +//! add a compatibility path in the loader) and leave the pinned +//! values here alone. +//! * Accept the break: bump [`super::file::SNAPSHOT_ABI_VERSION`] +//! together with `EXPECTED_ABI_VERSION` below, and update any +//! other `EXPECTED_*` constants here to match whatever the source +//! values now are. Snapshots produced by older builds will be +//! rejected at load time by the version check, so they must be +//! regenerated. Call this out in the release notes. + +use super::file::{ + MT_CONFIG_CURRENT, MT_SNAPSHOT_CURRENT, OCI_LAYOUT_VERSION, SNAPSHOT_ABI_VERSION, +}; + +const EXPECTED_ABI_VERSION: u32 = 1; +const EXPECTED_MT_CONFIG: &str = "application/vnd.hyperlight.sandbox.config.v1+json"; +const EXPECTED_MT_SNAPSHOT: &str = "application/vnd.hyperlight.snapshot.v1"; +const EXPECTED_OCI_LAYOUT_VERSION: &str = "1.0.0"; + +const _: () = { + assert!(SNAPSHOT_ABI_VERSION == EXPECTED_ABI_VERSION); + assert!(str_eq(MT_CONFIG_CURRENT, EXPECTED_MT_CONFIG)); + assert!(str_eq(MT_SNAPSHOT_CURRENT, EXPECTED_MT_SNAPSHOT)); + assert!(str_eq(OCI_LAYOUT_VERSION, EXPECTED_OCI_LAYOUT_VERSION)); +}; + +#[cfg(not(feature = "nanvix-unstable"))] +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + assert!(std::mem::size_of::() == 16); + assert!(std::mem::size_of::() == 4 * 16); + assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); +}; + +#[cfg(feature = "nanvix-unstable")] +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + assert!(std::mem::size_of::() == 16); + assert!(std::mem::size_of::() == 5 * 16); + assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); + assert!(std::mem::offset_of!(HyperlightPEB, file_mappings) == 64); +}; + +const _: () = { + use hyperlight_common::outb::OutBAction; + assert!(OutBAction::Log as u16 == 99); + assert!(OutBAction::CallFunction as u16 == 101); + assert!(OutBAction::Abort as u16 == 102); + assert!(OutBAction::DebugPrint as u16 == 103); +}; + +const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true +} diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index cc7b7587d..6c03bb123 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -535,7 +535,7 @@ fn guest_malloc_abort() { }); // allocate a vector (on heap) that is bigger than the heap - let heap_size = 0x4000; + let heap_size = 0x6000; let size_to_allocate = 0x10000; assert!( size_to_allocate > heap_size, @@ -616,7 +616,7 @@ fn corrupt_output_back_pointer_rejected() { #[test] fn guest_panic_no_alloc() { - let heap_size = 0x4000; + let heap_size = 0x6000; let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); diff --git a/src/hyperlight_host/tests/snapshot_goldens/checks.rs b/src/hyperlight_host/tests/snapshot_goldens/checks.rs new file mode 100644 index 000000000..8a9559f2a --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/checks.rs @@ -0,0 +1,346 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Functional checks against goldens loaded from the on-disk cache. +//! +//! Each check runs against a fresh `MultiUseSandbox` built from +//! the golden for `Check::kind`, so checks are independent and +//! one failure does not poison the next. +//! +//! Adding coverage: write a `fn(&mut MultiUseSandbox) -> Result<(), +//! String>` and add one row to `CHECKS`. + +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; + +use crate::fixtures::{CALL_COUNTER_BUMP, HEAP_PATTERN_LEN, INIT_DATA, register_host_echo_fns}; +use crate::platform::Kind; + +pub struct Check { + pub name: &'static str, + pub kind: Kind, + pub run: fn(&mut MultiUseSandbox) -> Result<(), String>, +} + +pub const CHECKS: &[Check] = &[ + Check { + name: "init/basic_call", + kind: Kind::Init, + run: init_basic_call, + }, + Check { + name: "init/data_round_trip", + kind: Kind::Init, + run: init_data_round_trip, + }, + Check { + name: "init/custom_layout_works", + kind: Kind::Init, + run: init_custom_layout_works, + }, + Check { + name: "call/captured_bss", + kind: Kind::Call, + run: call_captured_bss, + }, + Check { + name: "call/captured_heap_pattern", + kind: Kind::Call, + run: call_captured_heap_pattern, + }, + Check { + name: "call/guest_types_round_trip", + kind: Kind::Call, + run: call_guest_types_round_trip, + }, + Check { + name: "call/host_round_trips", + kind: Kind::Call, + run: call_host_round_trips, + }, + Check { + name: "call/chained_snapshot", + kind: Kind::Call, + run: call_chained_snapshot, + }, +]; + +// ----------------------------------------------------------------- +// init +// ----------------------------------------------------------------- + +/// Loaded init golden answers a basic call and observes a clean +/// BSS. Covers the header layout, layout arithmetic, PEB contents, +/// the dispatch port, the initialise entry convention, and BSS init. +fn init_basic_call(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != 0 { + return Err(format!("fresh init must observe BSS == 0, got {value}")); + } + Ok(()) +} + +/// `INIT_DATA` survives the snapshot round-trip with permissions +/// intact. The guest's `ReadFromUserMemory` returns the captured +/// bytes; a mismatch indicates silent corruption of the init_data +/// region. +fn init_data_round_trip(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let bytes: Vec = sbox + .call( + "ReadFromUserMemory", + (INIT_DATA.len() as u64, INIT_DATA.to_vec()), + ) + .map_err(|e| format!("ReadFromUserMemory: {e}"))?; + if bytes != INIT_DATA { + return Err(format!( + "captured init_data did not round-trip byte-for-byte (len={})", + bytes.len(), + )); + } + Ok(()) +} + +/// Any silent shift in `SandboxMemoryLayout::new` arithmetic with +/// the non-default sizes from `golden_config` would land the PEB or +/// scratch buffers at the wrong addresses; an `Echo` would then +/// fail. +fn init_custom_layout_works(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let got: String = sbox + .call("Echo", "custom-layout".to_string()) + .map_err(|e| format!("Echo: {e}"))?; + if got != "custom-layout" { + return Err(format!("Echo returned {got:?}")); + } + Ok(()) +} + +// ----------------------------------------------------------------- +// call +// ----------------------------------------------------------------- + +/// Captured BSS restores exactly: `COUNTER == CALL_COUNTER_BUMP`. +/// Covers the dispatch convention, sregs apply, page-table +/// relocation, captured stack/BSS. +fn call_captured_bss(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != CALL_COUNTER_BUMP { + return Err(format!( + "captured COUNTER expected {CALL_COUNTER_BUMP}, got {value}", + )); + } + Ok(()) +} + +/// Captured heap state restores exactly: the pinned `Vec` +/// pattern produced by `AllocAndWritePattern` survives across +/// save/load. +fn call_captured_heap_pattern(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let got: Vec = sbox + .call("ReadPattern", ()) + .map_err(|e| format!("ReadPattern: {e}"))?; + let expected: Vec = (0..HEAP_PATTERN_LEN as usize) + .map(|i| (i & 0xff) as u8) + .collect(); + if got != expected { + return Err(format!( + "captured heap pattern mismatch (got len {} expected len {})", + got.len(), + expected.len(), + )); + } + Ok(()) +} + +/// Guest-call wire format for every primitive parameter and return +/// type. Each loop asserts an `EchoT` round-trips. Float NaN goes +/// through `is_nan` since `NaN != NaN`. +fn call_guest_types_round_trip(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! echo { + ($name:expr, $ty:ty, $values:expr) => {{ + for &v in $values.iter() { + let got: $ty = sbox + .call($name, v) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + } + }}; + } + echo!("EchoI32", i32, [i32::MIN, -1, 0, 1, i32::MAX]); + echo!("EchoU32", u32, [0u32, 1, u32::MAX]); + echo!("EchoI64", i64, [i64::MIN, -1, 0, 1, i64::MAX]); + echo!("EchoU64", u64, [0u64, 1, u64::MAX]); + echo!( + "EchoFloat", + f32, + [ + 0.0f32, + -1.5, + 1.5, + f32::MIN, + f32::MAX, + f32::INFINITY, + f32::NEG_INFINITY, + ] + ); + let got: f32 = sbox + .call("EchoFloat", f32::NAN) + .map_err(|e| format!("EchoFloat(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoFloat(NaN) returned {got}")); + } + echo!( + "EchoDouble", + f64, + [ + 0.0f64, + -1.5, + 1.5, + f64::MIN, + f64::MAX, + f64::INFINITY, + f64::NEG_INFINITY, + ] + ); + let got: f64 = sbox + .call("EchoDouble", f64::NAN) + .map_err(|e| format!("EchoDouble(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoDouble(NaN) returned {got}")); + } + echo!("EchoBool", bool, [false, true]); + + for v in [String::new(), "hello".to_string(), "héllo 🌍".to_string()] { + let got: String = sbox + .call("Echo", v.clone()) + .map_err(|e| format!("Echo({v:?}): {e}"))?; + if got != v { + return Err(format!("Echo({v:?}) returned {got:?}")); + } + } + for v in [ + Vec::::new(), + vec![0u8, 1, 2, 3, 0xff], + (0..256u32).map(|i| (i & 0xff) as u8).collect::>(), + ] { + let got: Vec = sbox + .call("GetSizePrefixedBuffer", v.clone()) + .map_err(|e| format!("GetSizePrefixedBuffer(len={}): {e}", v.len()))?; + if got != v { + return Err(format!( + "GetSizePrefixedBuffer(len={}) did not round-trip", + v.len(), + )); + } + } + let _: () = sbox.call("NoOp", ()).map_err(|e| format!("NoOp: {e}"))?; + let mixed: i32 = sbox + .call( + "PrintElevenArgs", + ( + "a".to_string(), + 1i32, + 2i64, + "b".to_string(), + "c".to_string(), + true, + false, + 3u32, + 4u64, + 5i32, + 6.5f32, + ), + ) + .map_err(|e| format!("PrintElevenArgs: {e}"))?; + if mixed < 0 { + return Err(format!("PrintElevenArgs returned {mixed}")); + } + Ok(()) +} + +/// Host-call wire format for every primitive parameter and return +/// type. Each `RoundTripHostT` invokes the matching `HostEchoT` on +/// the registered host-fn set. +fn call_host_round_trips(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! rt { + ($name:expr, $ty:ty, $value:expr) => {{ + let v: $ty = $value; + let got: $ty = sbox + .call($name, v.clone()) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + }}; + } + rt!("RoundTripHostI32", i32, -7); + rt!("RoundTripHostU32", u32, 0xdead_beef); + rt!("RoundTripHostI64", i64, i64::MIN); + rt!("RoundTripHostU64", u64, u64::MAX); + rt!("RoundTripHostF32", f32, -1.25); + rt!("RoundTripHostF64", f64, 1234.5); + rt!("RoundTripHostBool", bool, false); + rt!("RoundTripHostString", String, "round-trip".to_string()); + rt!("RoundTripHostVecBytes", Vec, vec![0u8, 1, 2, 3, 0xff]); + Ok(()) +} + +/// Snapshot-from-loaded-snapshot path. Mutates state on the loaded +/// call golden, takes a fresh snapshot, round-trips it through an +/// OCI layout on disk, and asserts the mutation survives. +fn call_chained_snapshot(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let val: i32 = sbox + .call("AddToStatic", 5i32) + .map_err(|e| format!("AddToStatic: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "AddToStatic returned {val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + let snap = sbox + .snapshot() + .map_err(|e| format!("take chained snapshot: {e}"))?; + + let tmp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let layout = tmp.path().join("chained"); + let tag = "chained"; + snap.to_oci(&layout, tag) + .map_err(|e| format!("to_oci: {e}"))?; + + let loaded = Snapshot::from_oci(&layout, tag).map_err(|e| format!("from_oci: {e}"))?; + let mut funcs = HostFunctions::default(); + register_host_echo_fns(&mut funcs); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None) + .map_err(|e| format!("from_snapshot: {e}"))?; + let val: i32 = sbox2 + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic on chained: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "chained snapshot observed COUNTER={val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs new file mode 100644 index 000000000..0755d0718 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs @@ -0,0 +1,140 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Canonical fixture builders. These define exactly what bytes a +//! goldens push contains. Any change here is a snapshot content +//! change and requires a goldens regen. + +use std::sync::Arc; + +use hyperlight_host::func::Registerable; +use hyperlight_host::sandbox::SandboxConfiguration; +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::sandbox::uninitialized::GuestEnvironment; +use hyperlight_host::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; +use hyperlight_testing::simple_guest_as_string; + +/// Init data bytes baked into the init golden. Loaded back via +/// `ReadFromUserMemory` to assert byte-for-byte round-trip. +pub const INIT_DATA: &[u8] = b"hyperlight-snapshot-golden-init-data\0"; + +/// Heap pattern length used by the call golden. Small enough to +/// stay cheap, large enough to exercise non-trivial heap state. +pub const HEAP_PATTERN_LEN: u64 = 1024; + +/// Value the captured `COUNTER` static must hold in the call +/// golden. Set by `AddToStatic(CALL_COUNTER_BUMP)` at generate +/// time. +pub const CALL_COUNTER_BUMP: i32 = 42; + +/// Canonical `SandboxConfiguration` used to produce the goldens. +/// Layout knobs are deliberately bumped away from defaults so any +/// silent arithmetic change in `SandboxMemoryLayout::new` shifts at +/// least one region between generate-time and load-time. +fn golden_config() -> SandboxConfiguration { + let mut cfg = SandboxConfiguration::default(); + cfg.set_input_data_size(64 * 1024); + cfg.set_output_data_size(64 * 1024); + cfg.set_heap_size(256 * 1024); + cfg.set_scratch_size(512 * 1024); + cfg +} + +fn simpleguest_path() -> String { + simple_guest_as_string().expect("simpleguest_path") +} + +pub fn generate(kind: crate::platform::Kind) -> Arc { + match kind { + crate::platform::Kind::Init => generate_init(), + crate::platform::Kind::Call => generate_call(), + } +} + +pub fn generate_init() -> Arc { + let env = GuestEnvironment::new(GuestBinary::FilePath(simpleguest_path()), Some(INIT_DATA)); + Arc::new(Snapshot::from_env(env, golden_config()).expect("Snapshot::from_env (init)")) +} + +pub fn generate_call() -> Arc { + let mut u = UninitializedSandbox::new( + GuestBinary::FilePath(simpleguest_path()), + Some(golden_config()), + ) + .expect("UninitializedSandbox::new"); + register_host_echo_fns(&mut u); + let mut sbox = u.evolve().expect("evolve"); + run_canonical_calls(&mut sbox); + sbox.snapshot().expect("snapshot") +} + +/// Deterministic sequence of guest calls that mutate captured state +/// before snapshotting. Each call lands a specific bit of state +/// (BSS, heap, host-call wiring) that one of the per-surface +/// checks then asserts on after the golden is loaded. +fn run_canonical_calls(sbox: &mut MultiUseSandbox) { + let bumped: i32 = sbox + .call("AddToStatic", CALL_COUNTER_BUMP) + .expect("AddToStatic"); + assert_eq!(bumped, CALL_COUNTER_BUMP); + + let _: () = sbox + .call("AllocAndWritePattern", HEAP_PATTERN_LEN) + .expect("AllocAndWritePattern"); + + // Drive every host fn once so the captured host_function_details + // blob has known signatures, and any regression in host-dispatch + // surfaces at generate time rather than only during golden load. + let _: i32 = sbox.call("RoundTripHostI32", 1234i32).expect("RTH i32"); + let _: u32 = sbox.call("RoundTripHostU32", 4321u32).expect("RTH u32"); + let _: i64 = sbox.call("RoundTripHostI64", -42i64).expect("RTH i64"); + let _: u64 = sbox.call("RoundTripHostU64", 1u64 << 40).expect("RTH u64"); + let _: f32 = sbox.call("RoundTripHostF32", 3.5f32).expect("RTH f32"); + let _: f64 = sbox.call("RoundTripHostF64", -2.25f64).expect("RTH f64"); + let _: bool = sbox.call("RoundTripHostBool", true).expect("RTH bool"); + let _: String = sbox + .call("RoundTripHostString", "hi".to_string()) + .expect("RTH string"); + let _: Vec = sbox + .call("RoundTripHostVecBytes", vec![1u8, 2, 3]) + .expect("RTH vec"); +} + +/// Register the `HostEcho*` family used by the call golden. Same +/// helper is used both at generate time (against +/// `UninitializedSandbox`) and at load time (against +/// `HostFunctions`) so the registered set matches the captured +/// `host_function_details`. +pub fn register_host_echo_fns(r: &mut R) { + r.register_host_function("HostEchoI32", |v: i32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU32", |v: u32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoI64", |v: i64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU64", |v: u64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF32", |v: f32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF64", |v: f64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoBool", |v: bool| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoString", |v: String| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoVecBytes", |v: Vec| Ok(v)) + .unwrap(); +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/main.rs b/src/hyperlight_host/tests/snapshot_goldens/main.rs new file mode 100644 index 000000000..c0c720bab --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/main.rs @@ -0,0 +1,123 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Snapshot goldens custom-harness test binary. +//! +//! Default mode runs the libtest-mimic harness with one trial per +//! row in `checks::CHECKS`, loading each kind's golden from +//! `target/snapshot-goldens-cache/{version}/{tag}/`. The +//! `generate [out-dir]` subcommand writes the canonical snapshots +//! for the local platform as OCI Image Layouts under `out-dir`, +//! defaulting to the verify cache for a local round-trip. +//! +//! Populate the cache with `just snapshot-goldens-pull` or +//! `just snapshot-goldens-generate`. Set `HYPERLIGHT_GOLDENS_HV` +//! to force the hypervisor name when more than one is available. + +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; +use libtest_mimic::{Arguments, Failed, Trial}; + +mod checks; +mod fixtures; +mod oci; +mod platform; + +use checks::Check; +use platform::{Kind, Platform}; + +fn main() -> ExitCode { + let mut argv = std::env::args().skip(1); + if argv.next().as_deref() == Some("generate") { + let out = argv + .next() + .map(PathBuf::from) + .unwrap_or_else(oci::cache_root); + return run_generate(&out); + } + run_verify() +} + +fn run_verify() -> ExitCode { + let args = Arguments::from_args(); + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: skipping verify: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::SUCCESS; + }; + println!( + "snapshot goldens: verifying platform={} version={}", + platform.suffix(), + platform::GOLDENS_VERSION, + ); + let trials = checks::CHECKS.iter().map(|c| trial(&platform, c)).collect(); + libtest_mimic::run(&args, trials).exit_code() +} + +fn trial(platform: &Platform, check: &'static Check) -> Trial { + let tag = platform.tag(check.kind); + Trial::test(check.name, move || { + let dir = oci::golden_dir(&tag).map_err(Failed::from)?; + let mut sbox = load_sandbox(&dir, &tag, check.kind).map_err(Failed::from)?; + (check.run)(&mut sbox).map_err(Failed::from) + }) +} + +fn load_sandbox(golden_dir: &Path, tag: &str, kind: Kind) -> Result { + let snap = Snapshot::from_oci(golden_dir, tag) + .map_err(|e| format!("Snapshot::from_oci({tag}): {e}"))?; + let mut funcs = HostFunctions::default(); + if matches!(kind, Kind::Call) { + fixtures::register_host_echo_fns(&mut funcs); + } + MultiUseSandbox::from_snapshot(Arc::new(snap), funcs, None) + .map_err(|e| format!("MultiUseSandbox::from_snapshot({tag}): {e}")) +} + +fn run_generate(out_dir: &Path) -> ExitCode { + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: generate: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::FAILURE; + }; + if let Err(e) = std::fs::create_dir_all(out_dir) { + eprintln!("snapshot goldens: generate: create {out_dir:?}: {e}"); + return ExitCode::FAILURE; + } + println!( + "snapshot goldens: generating platform={} version={} into {}", + platform.suffix(), + platform::GOLDENS_VERSION, + out_dir.display(), + ); + for kind in [Kind::Init, Kind::Call] { + let tag = platform.tag(kind); + let dir = out_dir.join(&tag); + let snap = fixtures::generate(kind); + if let Err(e) = snap.to_oci(&dir, &tag) { + eprintln!("snapshot goldens: generate: to_oci({tag}): {e}"); + return ExitCode::FAILURE; + } + println!(" wrote {tag} -> {}", dir.display()); + } + ExitCode::SUCCESS +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/oci.rs b/src/hyperlight_host/tests/snapshot_goldens/oci.rs new file mode 100644 index 000000000..de378b8c1 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/oci.rs @@ -0,0 +1,54 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! On-disk cache lookup for golden OCI Image Layouts. + +use std::path::PathBuf; + +use crate::platform::GOLDENS_VERSION; + +pub fn cache_root() -> PathBuf { + // Workspace target dir is two levels up from this crate. + let target = std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| { + let raw = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("target"); + std::fs::canonicalize(&raw).unwrap_or(raw) + }); + target.join("snapshot-goldens-cache").join(GOLDENS_VERSION) +} + +fn cache_dir_for(tag: &str) -> PathBuf { + cache_root().join(tag) +} + +/// Locate the golden OCI Image Layout for `tag` in the local +/// cache. A missing layout is an error with guidance to populate +/// the cache. +pub fn golden_dir(tag: &str) -> Result { + let dir = cache_dir_for(tag); + if dir.join("oci-layout").is_file() { + return Ok(dir); + } + Err(format!( + "no golden OCI layout found at {dir:?} for tag `{tag}`. \ + Run `just snapshot-goldens-pull` to fetch the published goldens, \ + or `just snapshot-goldens-generate` to regenerate them locally.", + )) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/platform.rs b/src/hyperlight_host/tests/snapshot_goldens/platform.rs new file mode 100644 index 000000000..63a62ead5 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/platform.rs @@ -0,0 +1,188 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Local platform detection and tag naming for snapshot goldens. +//! +//! A snapshot is not portable across `(hypervisor, cpu vendor, +//! build profile)`. Each such triple gets its own set of tags, +//! named `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}-{kind}`. + +/// Goldens version. Bump in lockstep with any change that affects +/// snapshot bytes: ABI bump, media type bump, simpleguest changes, +/// layout arithmetic changes, captured-register changes, etc. +/// +/// The runtime tripwire test +/// `hyperlight_host::sandbox::snapshot::tripwires::media_types_match_expected_for_goldens` +/// and the compile-time `SNAPSHOT_ABI_VERSION` assertion pin the +/// known ABI surface against this version's goldens. +pub const GOLDENS_VERSION: &str = "v1"; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Kind { + Init, + Call, +} + +impl Kind { + pub fn as_str(self) -> &'static str { + match self { + Self::Init => "init", + Self::Call => "call", + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Hypervisor { + Kvm, + Mshv, + Whp, +} + +impl Hypervisor { + fn as_str(self) -> &'static str { + match self { + Self::Kvm => "kvm", + Self::Mshv => "mshv", + Self::Whp => "whp", + } + } + + /// Detect the locally available hypervisor. Order matches the + /// host crate's preference: `/dev/mshv` over `/dev/kvm` on + /// Linux, WHP on Windows. `HYPERLIGHT_GOLDENS_HV` overrides on + /// hosts that have more than one available. + fn detect() -> Option { + if let Some(v) = std::env::var_os("HYPERLIGHT_GOLDENS_HV") { + return match v.to_string_lossy().as_ref() { + "kvm" => Some(Self::Kvm), + "mshv" => Some(Self::Mshv), + "whp" => Some(Self::Whp), + _ => None, + }; + } + #[cfg(target_os = "linux")] + { + if std::path::Path::new("/dev/mshv").exists() { + return Some(Self::Mshv); + } + if std::path::Path::new("/dev/kvm").exists() { + return Some(Self::Kvm); + } + None + } + #[cfg(target_os = "windows")] + { + Some(Self::Whp) + } + #[cfg(not(any(target_os = "linux", target_os = "windows")))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum CpuVendor { + Intel, + Amd, +} + +impl CpuVendor { + fn as_str(self) -> &'static str { + match self { + Self::Intel => "intel", + Self::Amd => "amd", + } + } + + /// Detect the local CPU vendor via the `0` leaf of `cpuid`. + /// Returns `None` on non-`x86_64` targets or unknown vendor + /// strings. + fn detect() -> Option { + #[cfg(target_arch = "x86_64")] + { + // SAFETY: cpuid leaf 0 is always available on x86_64. + let r = unsafe { core::arch::x86_64::__cpuid(0) }; + let mut bytes = [0u8; 12]; + bytes[0..4].copy_from_slice(&r.ebx.to_le_bytes()); + bytes[4..8].copy_from_slice(&r.edx.to_le_bytes()); + bytes[8..12].copy_from_slice(&r.ecx.to_le_bytes()); + match &bytes { + b"GenuineIntel" => Some(Self::Intel), + b"AuthenticAMD" => Some(Self::Amd), + _ => None, + } + } + #[cfg(not(target_arch = "x86_64"))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Profile { + Debug, + Release, +} + +impl Profile { + fn as_str(self) -> &'static str { + match self { + Self::Debug => "debug", + Self::Release => "release", + } + } + + fn detect() -> Self { + if cfg!(debug_assertions) { + Self::Debug + } else { + Self::Release + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Platform { + hv: Hypervisor, + cpu: CpuVendor, + profile: Profile, +} + +impl Platform { + pub fn detect() -> Option { + Some(Self { + hv: Hypervisor::detect()?, + cpu: CpuVendor::detect()?, + profile: Profile::detect(), + }) + } + + pub fn suffix(&self) -> String { + format!( + "{}-{}-{}", + self.hv.as_str(), + self.cpu.as_str(), + self.profile.as_str(), + ) + } + + pub fn tag(&self, kind: Kind) -> String { + format!("{}-{}-{}", GOLDENS_VERSION, self.suffix(), kind.as_str()) + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index b6844a716..7d6f15aff 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -385,6 +385,149 @@ fn get_size_prefixed_buffer(data: Vec) -> Vec { data } +// --- Type-coverage echoes (used by snapshot golden tests F6) --- +// +// One guest function per primitive parameter/return type, so the +// guest-call wire format for each variant of `ParameterValue` / +// `ReturnValue` is exercised by a single round-trip call. +// +// Strings (`Echo`), `Vec` (`GetSizePrefixedBuffer`), `f32` +// (`EchoFloat`) and `f64` (`EchoDouble`) are already covered above. +// The `unit` return type is exercised by `NoOp` below. + +#[guest_function("EchoI32")] +fn echo_i32(v: i32) -> i32 { + v +} + +#[guest_function("EchoU32")] +fn echo_u32(v: u32) -> u32 { + v +} + +#[guest_function("EchoI64")] +fn echo_i64(v: i64) -> i64 { + v +} + +#[guest_function("EchoU64")] +fn echo_u64(v: u64) -> u64 { + v +} + +#[guest_function("EchoBool")] +fn echo_bool(v: bool) -> bool { + v +} + +#[guest_function("NoOp")] +fn no_op() {} + +// --- Host-fn round trips (used by snapshot golden tests F7) --- +// +// One host function per primitive type. The host registers each; +// the guest invokes each via a `RoundTripHostT` wrapper so the test +// can verify the value round-trips through the host. This exercises +// the persisted `HostFunctionDetails` flatbuffer schema for every +// primitive type (both as parameter and as return). + +#[host_function("HostEchoI32")] +fn host_echo_i32(v: i32) -> Result; + +#[host_function("HostEchoU32")] +fn host_echo_u32(v: u32) -> Result; + +#[host_function("HostEchoI64")] +fn host_echo_i64(v: i64) -> Result; + +#[host_function("HostEchoU64")] +fn host_echo_u64(v: u64) -> Result; + +#[host_function("HostEchoF32")] +fn host_echo_f32(v: f32) -> Result; + +#[host_function("HostEchoF64")] +fn host_echo_f64(v: f64) -> Result; + +#[host_function("HostEchoBool")] +fn host_echo_bool(v: bool) -> Result; + +#[host_function("HostEchoString")] +fn host_echo_string(v: String) -> Result; + +#[host_function("HostEchoVecBytes")] +fn host_echo_vec_bytes(v: Vec) -> Result>; + +#[guest_function("RoundTripHostI32")] +fn round_trip_host_i32(v: i32) -> Result { + host_echo_i32(v) +} + +#[guest_function("RoundTripHostU32")] +fn round_trip_host_u32(v: u32) -> Result { + host_echo_u32(v) +} + +#[guest_function("RoundTripHostI64")] +fn round_trip_host_i64(v: i64) -> Result { + host_echo_i64(v) +} + +#[guest_function("RoundTripHostU64")] +fn round_trip_host_u64(v: u64) -> Result { + host_echo_u64(v) +} + +#[guest_function("RoundTripHostF32")] +fn round_trip_host_f32(v: f32) -> Result { + host_echo_f32(v) +} + +#[guest_function("RoundTripHostF64")] +fn round_trip_host_f64(v: f64) -> Result { + host_echo_f64(v) +} + +#[guest_function("RoundTripHostBool")] +fn round_trip_host_bool(v: bool) -> Result { + host_echo_bool(v) +} + +#[guest_function("RoundTripHostString")] +fn round_trip_host_string(v: String) -> Result { + host_echo_string(v) +} + +#[guest_function("RoundTripHostVecBytes")] +fn round_trip_host_vec_bytes(v: Vec) -> Result> { + host_echo_vec_bytes(v) +} + +// --- Heap pattern (used by snapshot golden test F5) --- +// +// `AllocAndWritePattern(len)` allocates a `Vec` of length `len`, +// writes a deterministic byte pattern into it, and pins it in a +// static so the heap allocation survives the snapshot. +// `ReadPattern()` returns whatever is currently pinned. The test +// snapshots between the two calls and asserts the bytes round-trip +// across the on-disk save/load. + +static mut HEAP_PATTERN: Option> = None; + +#[guest_function("AllocAndWritePattern")] +fn alloc_and_write_pattern(len: u64) { + let v: Vec = (0..len as usize).map(|i| (i & 0xff) as u8).collect(); + unsafe { HEAP_PATTERN = Some(v) }; +} + +#[guest_function("ReadPattern")] +fn read_pattern() -> Vec { + #[allow(static_mut_refs)] + unsafe { + HEAP_PATTERN.clone().unwrap_or_default() + } +} + #[expect( clippy::empty_loop, reason = "This function is used to keep the CPU busy"