diff --git a/cmd/containerd-shim-lcow-v1/specs/boot.go b/cmd/containerd-shim-lcow-v1/specs/boot.go new file mode 100644 index 0000000000..74dc6c5e1e --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/boot.go @@ -0,0 +1,177 @@ +//go:build windows + +package specs + +import ( + "context" + "fmt" + "os" + "path/filepath" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + "github.com/Microsoft/hcsshim/osversion" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" + + "github.com/sirupsen/logrus" +) + +// resolveBootFilesPath resolves and validates the boot files root path. +func resolveBootFilesPath(ctx context.Context, opts *runhcsoptions.Options, annotations map[string]string) (string, error) { + // If the customer provides the boot files path then it is given preference over the default path. + // Similarly, based on the existing behavior in old shim, the annotation provided boot files path + // is given preference over those in runhcs options. + bootFilesRootPath := oci.ParseAnnotationsString(annotations, shimannotations.BootFilesRootPath, opts.BootFilesRootPath) + if bootFilesRootPath == "" { + bootFilesRootPath = vmutils.DefaultLCOWOSBootFilesPath() + } + + if p, err := filepath.Abs(bootFilesRootPath); err == nil { + bootFilesRootPath = p + } else { + log.G(ctx).WithFields(logrus.Fields{ + logfields.Path: bootFilesRootPath, + logrus.ErrorKey: err, + }).Warning("could not make boot files path absolute") + } + + if _, err := os.Stat(bootFilesRootPath); err != nil { + return "", fmt.Errorf("boot_files_root_path %q not found: %w", bootFilesRootPath, err) + } + + return bootFilesRootPath, nil +} + +// parseBootOptions parses LCOW boot options from annotations and options. +// Returns the BootOptions proto and the full rootfs path. +func parseBootOptions(ctx context.Context, opts *runhcsoptions.Options, annotations map[string]string) (*shimsandbox.BootOptions, string, error) { + log.G(ctx).Debug("parseBootOptions: starting boot options parsing") + + // Resolve and validate boot files path. + bootFilesPath, err := resolveBootFilesPath(ctx, opts, annotations) + if err != nil { + return nil, "", err + } + + log.G(ctx).WithField(logfields.Path, bootFilesPath).Debug("using boot files path") + + bootOptions := &shimsandbox.BootOptions{} + + // Set the default rootfs to initrd. + rootFsFile := vmutils.InitrdFile + + // Helper to check file existence in boot files path. + fileExists := func(filename string) bool { + _, err := os.Stat(filepath.Join(bootFilesPath, filename)) + return err == nil + } + + // Reset the default values based on the presence of files in the boot files path. + // We have a rootfs.vhd in the boot files path. Use it over an initrd.img + if fileExists(vmutils.VhdFile) { + rootFsFile = vmutils.VhdFile + log.G(ctx).WithField( + vmutils.VhdFile, filepath.Join(bootFilesPath, vmutils.VhdFile), + ).Debug("updated LCOW root filesystem to " + vmutils.VhdFile) + } + + // KernelDirect supports uncompressed kernel if the kernel is present. + // Default to uncompressed if on box. NOTE: If `kernel` is already + // uncompressed and simply named 'kernel' it will still be used + // uncompressed automatically. + kernelDirectBootSupported := osversion.Build() >= 18286 + useKernelDirect := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.KernelDirectBoot, kernelDirectBootSupported) + + log.G(ctx).WithFields(logrus.Fields{ + "kernelDirectSupported": kernelDirectBootSupported, + "useKernelDirect": useKernelDirect, + }).Debug("determined boot mode") + + // If customer specifies kernel direct boot but the build does not support it, return an error. + if useKernelDirect && !kernelDirectBootSupported { + return nil, "", fmt.Errorf("KernelDirectBoot is not supported on builds older than 18286") + } + + // Determine kernel file based on boot mode + var kernelFileName string + if useKernelDirect { + // KernelDirect supports uncompressed kernel if present. + if fileExists(vmutils.UncompressedKernelFile) { + kernelFileName = vmutils.UncompressedKernelFile + log.G(ctx).WithField(vmutils.UncompressedKernelFile, filepath.Join(bootFilesPath, vmutils.UncompressedKernelFile)).Debug("updated LCOW kernel file to " + vmutils.UncompressedKernelFile) + } else if fileExists(vmutils.KernelFile) { + kernelFileName = vmutils.KernelFile + } else { + return nil, "", fmt.Errorf("kernel file not found in boot files path for kernel direct boot") + } + } else { + kernelFileName = vmutils.KernelFile + if !fileExists(vmutils.KernelFile) { + return nil, "", fmt.Errorf("kernel file %q not found in boot files path: %w", vmutils.KernelFile, os.ErrNotExist) + } + } + + log.G(ctx).WithField("kernelFile", kernelFileName).Debug("selected kernel file") + + // Parse preferred rootfs type annotation. This overrides the default set above based on file presence. + if preferredRootfsType := oci.ParseAnnotationsString(annotations, shimannotations.PreferredRootFSType, ""); preferredRootfsType != "" { + log.G(ctx).WithField("preferredRootFSType", preferredRootfsType).Debug("applying preferred rootfs type override") + switch preferredRootfsType { + case "initrd": + rootFsFile = vmutils.InitrdFile + case "vhd": + rootFsFile = vmutils.VhdFile + default: + return nil, "", fmt.Errorf("invalid PreferredRootFSType: %s", preferredRootfsType) + } + if !fileExists(rootFsFile) { + return nil, "", fmt.Errorf("%q not found in boot files path", rootFsFile) + } + } + + log.G(ctx).WithField("rootFsFile", rootFsFile).Debug("selected rootfs file") + + // Get kernel boot options from annotations (will be incorporated into kernel cmd line later) + kernelBootOptions := oci.ParseAnnotationsString(annotations, shimannotations.KernelBootOptions, "") + + // Set up boot configuration based on boot mode + if useKernelDirect { + log.G(ctx).Debug("configuring kernel direct boot") + bootOptions.LinuxKernelDirect = &shimsandbox.LinuxKernelDirect{ + KernelFilePath: filepath.Join(bootFilesPath, kernelFileName), + // KernelCmdLine will be populated later by buildKernelArgs + } + if rootFsFile == vmutils.InitrdFile { + bootOptions.LinuxKernelDirect.InitRdPath = filepath.Join(bootFilesPath, rootFsFile) + log.G(ctx).WithField("initrdPath", bootOptions.LinuxKernelDirect.InitRdPath).Debug("configured initrd for kernel direct boot") + } + // Store kernel boot options temporarily in KernelCmdLine; will be appended to full args later + bootOptions.LinuxKernelDirect.KernelCmdLine = kernelBootOptions + } else { + // UEFI boot + log.G(ctx).Debug("configuring UEFI boot") + bootOptions.Uefi = &shimsandbox.UEFI{ + BootThis: &shimsandbox.UefiBootEntry{ + DevicePath: `\` + kernelFileName, + DeviceType: "VmbFs", + VmbFsRootPath: bootFilesPath, + // OptionalData will be populated later by buildKernelArgs + OptionalData: kernelBootOptions, + }, + } + } + + rootFsFullPath := filepath.Join(bootFilesPath, rootFsFile) + log.G(ctx).WithFields(logrus.Fields{ + "rootFsFullPath": rootFsFullPath, + "kernelFilePath": filepath.Join(bootFilesPath, kernelFileName), + "useKernelDirect": useKernelDirect, + "kernelBootOptions": kernelBootOptions, + }).Info("boot options configured successfully") + + return bootOptions, rootFsFullPath, nil +} diff --git a/cmd/containerd-shim-lcow-v1/specs/confidential.go b/cmd/containerd-shim-lcow-v1/specs/confidential.go new file mode 100644 index 0000000000..d2b39b54a3 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/confidential.go @@ -0,0 +1,133 @@ +//go:build windows + +package specs + +import ( + "context" + "encoding/base64" + "fmt" + "os" + "path/filepath" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" + "github.com/Microsoft/hcsshim/pkg/securitypolicy" +) + +// parseConfidentialOptions parses LCOW confidential options from annotations. +// This should only be called for confidential scenarios. +func parseConfidentialOptions( + ctx context.Context, + opts *runhcsoptions.Options, + annotations map[string]string, + boot *shimsandbox.BootOptions, + mem *shimsandbox.MemoryConfig, +) (*shimsandbox.ConfidentialOptions, error) { + + log.G(ctx).Debug("parseConfidentialOptions: starting confidential options parsing") + + confidentialOptions := &shimsandbox.ConfidentialOptions{} + confidentialOptions.SecurityPolicy = oci.ParseAnnotationsString(annotations, shimannotations.LCOWSecurityPolicy, "") + confidentialOptions.SecurityPolicyEnforcer = oci.ParseAnnotationsString(annotations, shimannotations.LCOWSecurityPolicyEnforcer, "") + confidentialOptions.UvmReferenceInfoFile = oci.ParseAnnotationsString(annotations, shimannotations.LCOWReferenceInfoFile, vmutils.DefaultUVMReferenceInfoFile) + + // Resolve boot files path for confidential mode + bootFilesPath, err := resolveBootFilesPath(ctx, opts, annotations) + if err != nil { + return nil, fmt.Errorf("failed to resolve boot files path for confidential VM: %w", err) + } + + // Set the default GuestState filename. + // The kernel and minimal initrd are combined into a single vmgs file. + guestStateFile := vmutils.DefaultGuestStateFile + + // Allow override from annotation + if annotationGuestStateFile := oci.ParseAnnotationsString(annotations, shimannotations.LCOWGuestStateFile, ""); annotationGuestStateFile != "" { + guestStateFile = annotationGuestStateFile + } + + // Validate the VMGS template file exists and save the full path + vmgsTemplatePath := filepath.Join(bootFilesPath, guestStateFile) + if _, err := os.Stat(vmgsTemplatePath); os.IsNotExist(err) { + return nil, fmt.Errorf("the GuestState vmgs file '%s' was not found", vmgsTemplatePath) + } + confidentialOptions.VmgsTemplatePath = vmgsTemplatePath + log.G(ctx).WithField("vmgsTemplatePath", vmgsTemplatePath).Debug("VMGS template path configured") + + // Set default DmVerity rootfs VHD. + // The root file system comes from the dmverity vhd file which is mounted by the initrd in the vmgs file. + dmVerityRootfsFile := vmutils.DefaultDmVerityRootfsVhd + + // Allow override from annotation + if annotationDmVerityRootFsVhd := oci.ParseAnnotationsString(annotations, shimannotations.DmVerityRootFsVhd, ""); annotationDmVerityRootFsVhd != "" { + dmVerityRootfsFile = annotationDmVerityRootFsVhd + } + + // Validate the DmVerity rootfs VHD file exists and save the full path + dmVerityRootfsTemplatePath := filepath.Join(bootFilesPath, dmVerityRootfsFile) + if _, err := os.Stat(dmVerityRootfsTemplatePath); os.IsNotExist(err) { + return nil, fmt.Errorf("the DM Verity VHD file '%s' was not found", dmVerityRootfsTemplatePath) + } + confidentialOptions.DmVerityRootfsTemplatePath = dmVerityRootfsTemplatePath + log.G(ctx).WithField("dmVerityRootfsPath", dmVerityRootfsTemplatePath).Debug("DM Verity rootfs path configured") + + // Note: VPMem and vPCI assigned devices are already disabled in parseDeviceOptions + // when isConfidential is true. + + // Required by HCS for the isolated boot scheme, see also https://docs.microsoft.com/en-us/windows-server/virtualization/hyper-v/learn-more/generation-2-virtual-machine-security-settings-for-hyper-v + // A complete explanation of the why's and wherefores of starting an encrypted, isolated VM are beond the scope of these comments. + log.G(ctx).Debug("configuring UEFI secure boot for confidential VM") + boot.Uefi = &shimsandbox.UEFI{ + ApplySecureBootTemplate: "Apply", + // aka MicrosoftWindowsSecureBootTemplateGUID equivalent to "Microsoft Windows" template from Get-VMHost | select SecureBootTemplates + SecureBootTemplateID: "1734c6e8-3154-4dda-ba5f-a874cc483422", + } + // Clear any existing boot options to only have UEFI secure boot configuration. + boot.LinuxKernelDirect = nil + + // Set memory to physical backing (no overcommit) for confidential VMs + log.G(ctx).Debug("disabling memory overcommit for confidential VM") + mem.AllowOvercommit = false + + // Part of the protocol to ensure that the rules in the user's Security Policy are + // respected is to provide a hash of the policy to the hardware. This is immutable + // and can be used to check that the policy used by opengcs is the required one as + // a condition of releasing secrets to the container. + log.G(ctx).Debug("creating security policy digest") + policyDigest, err := securitypolicy.NewSecurityPolicyDigest(confidentialOptions.SecurityPolicy) + if err != nil { + return nil, fmt.Errorf("failed to create security policy digest: %w", err) + } + + // HCS API expects a base64 encoded string as LaunchData. Internally it + // decodes it to bytes. SEV later returns the decoded byte blob as HostData + // field of the report. + hostData := base64.StdEncoding.EncodeToString(policyDigest) + + // Put the measurement into the LaunchData field of the HCS creation command. + // This will end-up in HOST_DATA of SNP_LAUNCH_FINISH command and the ATTESTATION_REPORT + // retrieved by the guest later. + confidentialOptions.SecuritySettings = &shimsandbox.SecuritySettings{ + EnableTpm: false, + Isolation: &shimsandbox.IsolationSettings{ + IsolationType: "SecureNestedPaging", + LaunchData: hostData, + // HclEnabled: true, /* Not available in schema 2.5 - REQUIRED when using BlockStorage in 2.6 */ + HclEnabled: oci.ParseAnnotationsNullableBool(ctx, annotations, shimannotations.LCOWHclEnabled), + }, + } + + // Set default UVM reference info file if not set + if confidentialOptions.UvmReferenceInfoFile == "" { + confidentialOptions.UvmReferenceInfoFile = vmutils.DefaultUVMReferenceInfoFile + } + + // Note: HvSocket service table for confidential VMs is configured in parseAdditionalOptions. + + log.G(ctx).Info("confidential options configured successfully") + return confidentialOptions, nil +} diff --git a/cmd/containerd-shim-lcow-v1/specs/devices.go b/cmd/containerd-shim-lcow-v1/specs/devices.go new file mode 100644 index 0000000000..8dfa8eba03 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/devices.go @@ -0,0 +1,223 @@ +//go:build windows + +package specs + +import ( + "context" + "fmt" + "path/filepath" + "strings" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + "github.com/Microsoft/hcsshim/internal/devices" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + "github.com/Microsoft/hcsshim/osversion" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" + + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +// parseDeviceOptions parses device options from annotations and assigned devices. +// isConfidential indicates if this is a confidential scenario, which affects VPMem and PCI device configuration. +// numaConfig is used to determine if NUMA affinity propagation should be enabled for vPCI devices. +func parseDeviceOptions( + ctx context.Context, + annotations map[string]string, + devices []specs.WindowsDevice, + mem *shimsandbox.MemoryConfig, + rootFsFullPath string, + isConfidential bool, + numaConfig *shimsandbox.NUMAConfig, +) (*shimsandbox.DeviceOptions, error) { + + log.G(ctx).WithFields(logrus.Fields{ + "deviceCount": len(devices), + "isConfidential": isConfidential, + "rootFsPath": rootFsFullPath, + }).Debug("parseDeviceOptions: starting device options parsing") + + // ===============================Parse VPMem configuration=============================== + vpmemCount := oci.ParseAnnotationsUint32(ctx, annotations, shimannotations.VPMemCount, vmutils.DefaultVPMEMCount) + vpmemSize := oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.VPMemSize, vmutils.DefaultVPMemSizeBytes) + + // Multi-mapping is enabled by default on 19H1+, can be disabled via annotation. + vpmemNoMultiMapping := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.VPMemNoMultiMapping, osversion.Build() < osversion.V19H1) + vpmemMultiMapping := !vpmemNoMultiMapping + + // VPMem is not supported by the enlightened kernel for SNP (confidential VMs). + if mem.FullyPhysicallyBacked || isConfidential { + vpmemCount = 0 + } + + if vpmemCount > vmutils.MaxVPMEMCount { + return nil, fmt.Errorf("vp_mem_device_count cannot be greater than %d", vmutils.MaxVPMEMCount) + } + + if vpmemCount > 0 && vpmemSize%4096 != 0 { + return nil, fmt.Errorf("vp_mem_size_bytes must be a multiple of 4096") + } + + log.G(ctx).WithFields(logrus.Fields{ + "vpmemCount": vpmemCount, + "vpmemSizeBytes": vpmemSize, + "vpmemMultiMapping": vpmemMultiMapping, + }).Debug("parsed VPMem configuration") + + // Extract the rootfs file name. + rootFsFile := filepath.Base(rootFsFullPath) + + // Create VPMem controller configuration + var vpMemController *shimsandbox.VirtualPMemController + if vpmemCount > 0 && rootFsFile == vmutils.VhdFile { + // If booting from VHD via VPMem, configure the VPMem device for rootfs + vpMemController = &shimsandbox.VirtualPMemController{ + MaximumCount: vpmemCount, + MaximumSizeBytes: vpmemSize, + MultiMapping: vpmemMultiMapping, + Devices: make(map[string]*shimsandbox.VirtualPMemDevice), + } + + // Determine image format based on file extension + imageFormat := "Vhd1" + if strings.ToLower(filepath.Ext(rootFsFile)) == "vhdx" { + imageFormat = "Vhdx" + } + + // Add rootfs VHD as VPMem device 0 + vpMemController.Devices["0"] = &shimsandbox.VirtualPMemDevice{ + HostPath: rootFsFullPath, + ReadOnly: true, + ImageFormat: imageFormat, + UvmPath: "/", + } + + log.G(ctx).WithFields(logrus.Fields{ + "device": "0", + "path": rootFsFullPath, + "imageFormat": imageFormat, + "multiMapping": vpmemMultiMapping, + }).Debug("configured VPMem device for VHD rootfs boot") + } + + // ===============================Parse SCSI configuration=============================== + scsiControllerCount := uint32(1) + // If vpmemMaxCount has been set to 0, it means we are going to need multiple SCSI controllers + // to support lots of layers. + if osversion.Build() >= osversion.RS5 && vpmemCount == 0 { + scsiControllerCount = uint32(len(guestrequest.ScsiControllerGuids)) + } + + log.G(ctx).WithField("scsiControllerCount", scsiControllerCount).Debug("configuring SCSI controllers") + + // Initialize SCSI controllers map with empty controllers + scsiControllers := make(map[string]*shimsandbox.ScsiController) + for i := uint32(0); i < scsiControllerCount; i++ { + controllerGuid := guestrequest.ScsiControllerGuids[i] + scsiControllers[controllerGuid] = &shimsandbox.ScsiController{ + Attachments: make(map[string]*shimsandbox.ScsiAttachment), + } + } + + // If booting from VHD via SCSI (no VPMem), attach the rootfs VHD to SCSI controller 0, LUN 0 + // For confidential Containers, rootFSFile will be DmVerityRootfsPath. + if vpmemCount == 0 && rootFsFile == vmutils.VhdFile { + scsiControllers[guestrequest.ScsiControllerGuids[0]].Attachments["0"] = &shimsandbox.ScsiAttachment{ + Type: "VirtualDisk", + Path: rootFsFullPath, + ReadOnly: true, + } + log.G(ctx).WithFields(logrus.Fields{ + "controller": guestrequest.ScsiControllerGuids[0], + "lun": "0", + "path": rootFsFullPath, + }).Debug("configured SCSI attachment for VHD rootfs boot") + } + + // ===============================Parse VPCI Devices configuration=============================== + // For confidential VMs, vPCI assigned devices are not supported + var devicesToAdd []*shimsandbox.VPciDevice + if !isConfidential { + log.G(ctx).Debug("parsing vPCI device assignments") + // deviceKey is used to uniquely identify a device for duplicate detection. + type deviceKey struct { + instanceID string + functionIndex uint32 + } + + // Use a map to track seen devices and avoid duplicates. + seen := make(map[deviceKey]struct{}) + devicesToAdd = make([]*shimsandbox.VPciDevice, 0, len(devices)) + + // Determine if NUMA affinity propagation should be enabled. + // Only applicable on builds >= V25H1Server with NUMA-enabled VMs. + var propagateAffinity *bool + if osversion.Get().Build >= osversion.V25H1Server { + numaEnabled := numaConfig != nil && (numaConfig.Numa != nil || numaConfig.NumaProcessors != nil) + if numaEnabled { + t := true + propagateAffinity = &t + log.G(ctx).Debug("NUMA affinity propagation enabled for vPCI devices") + } + } + + for _, dev := range devices { + if d := getVPCIDevice(ctx, dev); d != nil { + key := deviceKey{instanceID: d.DeviceInstanceID, functionIndex: d.VirtualFunctionIndex} + if _, exists := seen[key]; exists { + return nil, fmt.Errorf("device %s with index %d is specified multiple times", d.DeviceInstanceID, d.VirtualFunctionIndex) + } + seen[key] = struct{}{} + + // Generate a unique VMBus GUID for each vPCI device. + vmbusGUID, err := guid.NewV4() + if err != nil { + return nil, fmt.Errorf("failed to generate vmbus GUID for device %s: %w", d.DeviceInstanceID, err) + } + d.VmbusGUID = vmbusGUID.String() + d.PropagateNumaAffinity = propagateAffinity + + log.G(ctx).WithFields(logrus.Fields{ + "deviceInstanceID": d.DeviceInstanceID, + "virtualFunctionIndex": d.VirtualFunctionIndex, + "vmbusGUID": d.VmbusGUID, + }).Debug("configured vPCI device") + + devicesToAdd = append(devicesToAdd, d) + } + } + } + + deviceOptions := &shimsandbox.DeviceOptions{ + VpMemController: vpMemController, + ScsiControllers: scsiControllers, + VpciDevices: devicesToAdd, + Plan9: &shimsandbox.Plan9{ + Shares: []*shimsandbox.Plan9Share{}, + }, + } + + log.G(ctx).Info("device options configured successfully") + return deviceOptions, nil +} + +// getVPCIDevice maps a WindowsDevice into the sandbox vPCIDevice format when supported. +func getVPCIDevice(ctx context.Context, dev specs.WindowsDevice) *shimsandbox.VPciDevice { + pciID, index := devices.GetDeviceInfoFromPath(dev.ID) + if vmutils.IsValidDeviceType(dev.IDType) { + return &shimsandbox.VPciDevice{ + DeviceInstanceID: pciID, + VirtualFunctionIndex: uint32(index), + } + } + + log.G(ctx).WithFields(logrus.Fields{ + "device": dev, + }).Warnf("device type %s invalid, skipping", dev.IDType) + + return nil +} diff --git a/cmd/containerd-shim-lcow-v1/specs/doc.go b/cmd/containerd-shim-lcow-v1/specs/doc.go new file mode 100644 index 0000000000..99f2bf8807 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/doc.go @@ -0,0 +1,32 @@ +// Package specs encapsulates the business logic to parse annotations, devices, +// and runhcs options into an api/sandbox/v1.Spec which will be used by the shim to +// create UVMs (Utility VMs) via the Host Compute Service (HCS). +// +// The primary entry point is GenerateSandboxSpecs, which takes containerd runtime +// options, OCI annotations, and device assignments, and produces an api/sandbox/v1.Spec +// that contains all the configuration needed to create an LCOW (Linux Containers on +// Windows) sandbox environment. +// +// # Sandbox Specification Components +// +// The package handles parsing and validation of multiple configuration areas: +// +// - Boot Configuration: Kernel, initrd, root filesystem, and boot file paths +// - CPU Configuration: Processor count, limits, and NUMA topology +// - Memory Configuration: Memory size, MMIO gaps, and memory affinity +// - Device Configuration: VPMem devices, vPCI devices, and SCSI controllers +// - Storage Configuration: Storage QoS settings +// - Confidential Computing: Security policies, SNP settings, and encryption +// - Kernel Arguments: Command line parameters derived from all configuration sources +// +// # Annotation Support +// +// The package extensively uses OCI annotations to allow fine-grained control over +// UVM creation. Annotations can override default behaviors or provide additional +// configuration not available through standard containerd options. +// +// # Platform Support +// +// The package supports both AMD64 and ARM64 Linux platforms running on Windows +// hosts, with platform-specific handling where needed. +package specs diff --git a/cmd/containerd-shim-lcow-v1/specs/kernel_args.go b/cmd/containerd-shim-lcow-v1/specs/kernel_args.go new file mode 100644 index 0000000000..d2ef79c294 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/kernel_args.go @@ -0,0 +1,233 @@ +//go:build windows + +package specs + +import ( + "context" + "fmt" + "strings" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + iannotations "github.com/Microsoft/hcsshim/internal/annotations" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" +) + +// buildKernelArgs constructs the kernel command line from the parsed config structs. +func buildKernelArgs( + ctx context.Context, + opts *runhcsoptions.Options, + annotations map[string]string, + boot *shimsandbox.BootOptions, + rootFsFile string, + cpu *shimsandbox.CPUConfig, + device *shimsandbox.DeviceOptions, + additional *shimsandbox.AdditionalConfig, +) (string, error) { + + log.G(ctx).WithField("rootFsFile", rootFsFile).Debug("buildKernelArgs: starting kernel arguments construction") + + // Parse intermediate values from annotations that are only used for kernel args + vpciEnabled := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.VPCIEnabled, false) + disableTimeSyncService := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableLCOWTimeSyncService, false) + writableOverlayDirs := oci.ParseAnnotationsBool(ctx, annotations, iannotations.WritableOverlayDirs, false) + processDumpLocation := oci.ParseAnnotationsString(annotations, shimannotations.ContainerProcessDumpLocation, "") + + // Build kernel arguments in logical sections for better readability. + var args []string + + // 1. Root filesystem configuration. + rootfsArgs, err := buildRootfsArgs(ctx, annotations, rootFsFile, boot, device) + if err != nil { + return "", err + } + if rootfsArgs != "" { + args = append(args, rootfsArgs) + } + + // 2. Vsock transport configuration + // Explicitly disable virtio_vsock_init to ensure we use hv_sock transport. + // For kernels built without virtio-vsock this is a no-op. + args = append(args, "initcall_blacklist=virtio_vsock_init") + + // 3. Console and debugging configuration + vmDebugging := additional.ConsolePipe != "" || additional.EnableGraphicsConsole + args = append(args, buildConsoleArgs(additional)...) + + if !vmDebugging { + // Terminate the VM if there is a kernel panic. + args = append(args, "panic=-1", "quiet") + } + + // 4. User-provided kernel boot options from annotations + if boot.LinuxKernelDirect != nil && boot.LinuxKernelDirect.KernelCmdLine != "" { + args = append(args, boot.LinuxKernelDirect.KernelCmdLine) + } + + // 5. PCI configuration + if !vpciEnabled { + args = append(args, "pci=off") + } + + // 6. CPU configuration + args = append(args, fmt.Sprintf("nr_cpus=%d", cpu.ProcessorCount)) + + // 7. Miscellaneous kernel parameters + // brd.rd_nr=0 disables ramdisk, pmtmr=0 disables ACPI PM timer + args = append(args, "brd.rd_nr=0", "pmtmr=0") + + // 8. Init arguments (passed after "--" separator) + initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, vmDebugging) + args = append(args, "--", initArgs) + + return strings.Join(args, " "), nil +} + +// buildRootfsArgs constructs kernel arguments for root filesystem configuration. +func buildRootfsArgs( + ctx context.Context, + annotations map[string]string, + rootFsFile string, + boot *shimsandbox.BootOptions, + device *shimsandbox.DeviceOptions, +) (string, error) { + + isInitrd := rootFsFile == vmutils.InitrdFile + isVHD := rootFsFile == vmutils.VhdFile + kernelDirect := boot.LinuxKernelDirect != nil + + // InitRd boot (applicable only for UEFI mode - kernel direct handles initrd via InitRdPath) + if isInitrd && !kernelDirect { + return "initrd=/" + rootFsFile, nil + } + + // VHD boot + if isVHD { + // VPMem VHD(X) booting. + if device.VpMemController != nil && device.VpMemController.MaximumCount > 0 { + return "root=/dev/pmem0 ro rootwait init=/init", nil + } + + // SCSI VHD booting with dm-verity. + dmVerityMode := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DmVerityMode, false) + if dmVerityMode { + dmVerityCreateArgs := oci.ParseAnnotationsString(annotations, shimannotations.DmVerityCreateArgs, "") + if len(dmVerityCreateArgs) == 0 { + return "", fmt.Errorf("DmVerityCreateArgs should be set when DmVerityMode is true and not booting from a vmgs file") + } + return fmt.Sprintf("root=/dev/dm-0 dm-mod.create=%q init=/init", dmVerityCreateArgs), nil + } + + return "root=/dev/sda ro rootwait init=/init", nil + } + + return "", nil +} + +// buildConsoleArgs constructs kernel arguments for console configuration. +func buildConsoleArgs(additional *shimsandbox.AdditionalConfig) []string { + var args []string + + // Serial console configuration + if additional.ConsolePipe != "" { + args = append(args, "8250_core.nr_uarts=1", "8250_core.skip_txen_test=1", "console=ttyS0,115200") + } else { + args = append(args, "8250_core.nr_uarts=0") + } + + // Graphics console configuration + if additional.EnableGraphicsConsole { + args = append(args, "console=tty") + } + + return args +} + +// buildInitArgs constructs the init arguments (passed after "--" in kernel command line). +func buildInitArgs( + ctx context.Context, + opts *runhcsoptions.Options, + writableOverlayDirs bool, + disableTimeSyncService bool, + processDumpLocation string, + rootFsFile string, + vmDebugging bool, +) string { + // Inject initial entropy over vsock during init launch + entropyArgs := fmt.Sprintf("-e %d", vmutils.LinuxEntropyVsockPort) + + // Build GCS execution command + gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation) + + // Construct init arguments + var initArgsList []string + initArgsList = append(initArgsList, entropyArgs) + + // Handle writable overlay directories for VHD + if writableOverlayDirs { + if rootFsFile == vmutils.InitrdFile { + log.G(ctx).Warn("ignoring `WritableOverlayDirs` option since rootfs is already writable") + } else if rootFsFile == vmutils.VhdFile { + initArgsList = append(initArgsList, "-w") + } + } + + // Add GCS command execution + if vmDebugging { + // Launch a shell on the console for debugging + initArgsList = append(initArgsList, `sh -c "`+gcsCmd+` & exec sh"`) + } else { + initArgsList = append(initArgsList, gcsCmd) + } + + return strings.Join(initArgsList, " ") +} + +// buildGCSCommand constructs the GCS (Guest Compute Service) command line. +func buildGCSCommand( + opts *runhcsoptions.Options, + disableTimeSyncService bool, + processDumpLocation string, +) string { + // Start with vsockexec wrapper + var cmdParts []string + cmdParts = append(cmdParts, "/bin/vsockexec") + + // Add logging vsock port + cmdParts = append(cmdParts, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) + + // Determine log level + logLevel := "info" + if opts != nil && opts.LogLevel != "" { + logLevel = opts.LogLevel + } + + // Build GCS base command + gcsParts := []string{ + "/bin/gcs", + "-v4", + "-log-format json", + "-loglevel " + logLevel, + } + + // Add optional GCS flags + if disableTimeSyncService { + gcsParts = append(gcsParts, "-disable-time-sync") + } + + if opts != nil && opts.ScrubLogs { + gcsParts = append(gcsParts, "-scrub-logs") + } + + if processDumpLocation != "" { + gcsParts = append(gcsParts, "-core-dump-location", processDumpLocation) + } + + // Combine vsockexec and GCS command + cmdParts = append(cmdParts, strings.Join(gcsParts, " ")) + + return strings.Join(cmdParts, " ") +} diff --git a/cmd/containerd-shim-lcow-v1/specs/sandbox_specs.go b/cmd/containerd-shim-lcow-v1/specs/sandbox_specs.go new file mode 100644 index 0000000000..67467f0b54 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/sandbox_specs.go @@ -0,0 +1,517 @@ +package specs + +import ( + "context" + "fmt" + "path/filepath" + "strings" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + iannotations "github.com/Microsoft/hcsshim/internal/annotations" + "github.com/Microsoft/hcsshim/internal/gcs/prot" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/processorinfo" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + "github.com/Microsoft/hcsshim/osversion" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" + + "github.com/Microsoft/go-winio" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +// GenerateSandboxSpecs builds a shimsandbox.Spec from options, annotations, and device assignments. +// This Sandbox spec will be used by the CreateSandbox API to create a new sandbox. +func GenerateSandboxSpecs( + ctx context.Context, + opts *runhcsoptions.Options, + annotations map[string]string, + devices []specs.WindowsDevice, +) (*shimsandbox.Spec, error) { + + log.G(ctx).Info("GenerateSandboxSpecs: starting sandbox spec generation") + + if opts == nil { + return nil, fmt.Errorf("no options provided") + } + if annotations == nil { + annotations = map[string]string{} + } + + // Process annotations prior to parsing them into shimsandbox.Spec. + if err := processAnnotations(ctx, opts, annotations); err != nil { + return nil, fmt.Errorf("failed to process annotations: %w", err) + } + + // Validate sandbox platform and architecture. + platform := strings.ToLower(opts.SandboxPlatform) + log.G(ctx).WithField("platform", platform).Debug("validating sandbox platform") + if platform != "linux/amd64" && platform != "linux/arm64" { + return nil, fmt.Errorf("unsupported sandbox platform: %s", opts.SandboxPlatform) + } + + // Determine if this is a confidential VM early, as it affects boot options parsing + securityPolicy := oci.ParseAnnotationsString(annotations, shimannotations.LCOWSecurityPolicy, "") + noSecurityHardware := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.NoSecurityHardware, false) + isConfidential := securityPolicy != "" && !noSecurityHardware + + log.G(ctx).WithFields(logrus.Fields{ + "isConfidential": isConfidential, + "hasSecurityPolicy": securityPolicy != "", + "noSecurityHardware": noSecurityHardware, + }).Debug("determined confidential VM mode") + + // Parse CPU configuration. + cpuConfig, err := parseCPUOptions(ctx, opts, annotations, platform) + if err != nil { + return nil, fmt.Errorf("failed to parse CPU parameters: %w", err) + } + + // Parse memory configuration. + memoryConfig, err := parseMemoryOptions(ctx, opts, annotations) + if err != nil { + return nil, fmt.Errorf("failed to parse memory parameters: %w", err) + } + + // Parse storage configuration. + storageConfig, err := parseStorageOptions(ctx, annotations, isConfidential) + if err != nil { + return nil, fmt.Errorf("failed to parse storage parameters: %w", err) + } + + // Parse NUMA settings only for non-confidential VMs. + var numaConfig *shimsandbox.NUMAConfig + if !isConfidential { + numaConfig, err = parseNUMAOptions(ctx, annotations, memoryConfig, cpuConfig) + if err != nil { + return nil, fmt.Errorf("failed to parse NUMA parameters: %w", err) + } + } + + // Parse any guest specific options. + guestOptions, err := parseGuestOptions(ctx, annotations) + if err != nil { + return nil, fmt.Errorf("failed to parse guest options: %w", err) + } + + // Parse boot options. + // For confidential VMs, we don't use the standard boot options - the UEFI secure boot + // settings will be set by parseConfidentialOptions. + bootOptions := &shimsandbox.BootOptions{} + var rootFsFullPath string + if !isConfidential { + bootOptions, rootFsFullPath, err = parseBootOptions(ctx, opts, annotations) + if err != nil { + return nil, fmt.Errorf("failed to parse boot options: %w", err) + } + } + + // Parse device options. + // This should be done after parsing boot options, as some device options may depend on boot settings (e.g., rootfs path). + deviceOptions, err := parseDeviceOptions(ctx, annotations, devices, memoryConfig, rootFsFullPath, isConfidential, numaConfig) + if err != nil { + return nil, fmt.Errorf("failed to parse device options: %w", err) + } + + // Parse additional options and settings. + additionalConfig, err := parseAdditionalOptions(ctx, annotations, isConfidential) + if err != nil { + return nil, fmt.Errorf("failed to parse additional parameters: %w", err) + } + + // For confidential VMs, parse confidential options which includes secure boot settings. + var confidentialOptions *shimsandbox.ConfidentialOptions + if isConfidential { + confidentialOptions, err = parseConfidentialOptions(ctx, opts, annotations, bootOptions, memoryConfig) + if err != nil { + return nil, fmt.Errorf("failed to parse confidential options: %w", err) + } + } + + // Build the kernel command line after all options are parsed. + // For confidential VMs (SNP mode), kernel args are embedded in VMGS file, so skip this. + var kernelArgs string + if !isConfidential { + kernelArgs, err = buildKernelArgs(ctx, opts, annotations, bootOptions, filepath.Base(rootFsFullPath), cpuConfig, deviceOptions, additionalConfig) + if err != nil { + return nil, fmt.Errorf("failed to build kernel args: %w", err) + } + + // Other boot options were already added earlier in parseBootOptions. + // Set the kernel args here which are constructed based on all other options. + if bootOptions.LinuxKernelDirect != nil { + bootOptions.LinuxKernelDirect.KernelCmdLine = kernelArgs + } else if bootOptions.Uefi != nil && bootOptions.Uefi.BootThis != nil { + bootOptions.Uefi.BootThis.OptionalData = kernelArgs + } + log.G(ctx).WithField("kernelArgs", kernelArgs).Debug("kernel arguments configured") + } + + // Finally, build the shimsandbox.Spec with all the parsed and processed options. + log.G(ctx).Debug("assembling final sandbox spec") + spec := &shimsandbox.Spec{ + HcsSchemaVersion: getHCSSchemaVersion(isConfidential), + CpuConfig: cpuConfig, + MemoryConfig: memoryConfig, + StorageConfig: storageConfig, + NumaConfig: numaConfig, + BootOptions: bootOptions, + GuestOptions: guestOptions, + DeviceOptions: deviceOptions, + ConfidentialOptions: confidentialOptions, + AdditionalConfig: additionalConfig, + } + + // Resource Partition ID + resourcePartitionID := oci.ParseAnnotationsString(annotations, shimannotations.ResourcePartitionID, "") + if resourcePartitionID != "" { + log.G(ctx).WithField("resourcePartitionID", resourcePartitionID).Debug("setting resource partition ID") + spec.ResourcePartitionID = &resourcePartitionID + } + + // CPU group and resource partition are mutually exclusive. + if spec.CpuConfig.CpuGroupID != "" && spec.ResourcePartitionID != nil { + return nil, fmt.Errorf("cpu_group_id and resource_partition_id cannot be set at the same time") + } + + if spec.ResourcePartitionID != nil { + if _, err := guid.FromString(*spec.ResourcePartitionID); err != nil { + return nil, fmt.Errorf("failed to parse resource_partition_id %q to GUID: %w", *spec.ResourcePartitionID, err) + } + } + + log.G(ctx).WithFields(logrus.Fields{ + "sandbox-spec": spec, + }).Info("sandbox spec generation completed successfully") + + return spec, nil +} + +// processAnnotations applies defaults and normalizes annotation values. +func processAnnotations(ctx context.Context, opts *runhcsoptions.Options, annotations map[string]string) error { + log.G(ctx).WithField("annotations", annotations).Debug("processing annotations") + + // Apply default annotations. + for key, value := range opts.DefaultContainerAnnotations { + // Only set default if not already set in annotations + if _, exists := annotations[key]; !exists { + annotations[key] = value + } + } + + err := oci.ProcessAnnotations(ctx, annotations) + if err != nil { + return fmt.Errorf("failed to process OCI annotations: %w", err) + } + + // Check for explicitly unsupported annotations. + ncProxy := oci.ParseAnnotationsString(annotations, shimannotations.NetworkConfigProxy, "") + if ncProxy != "" { + return fmt.Errorf("%s annotation is not supported", shimannotations.NetworkConfigProxy) + } + + log.G(ctx).Debug("annotations processed successfully") + return nil +} + +// getHCSSchemaVersion returns the appropriate HCS schema version. +// Confidential containers require schema v2.5, while regular ones use schema v2.1. +func getHCSSchemaVersion(isConfidential bool) *shimsandbox.HCSSchemaVersion { + if isConfidential { + return &shimsandbox.HCSSchemaVersion{ + Major: 2, + Minor: 5, + } + } + return &shimsandbox.HCSSchemaVersion{ + Major: 2, + Minor: 1, + } +} + +// parseCPUOptions parses CPU options from annotations and options. +func parseCPUOptions(ctx context.Context, opts *runhcsoptions.Options, annotations map[string]string, platform string) (*shimsandbox.CPUConfig, error) { + log.G(ctx).Debug("parsing CPU configuration") + cpu := &shimsandbox.CPUConfig{} + + cpu.ProcessorCount = oci.ParseAnnotationsInt32(ctx, annotations, shimannotations.ProcessorCount, opts.VmProcessorCount) + if cpu.ProcessorCount <= 0 { + cpu.ProcessorCount = vmutils.DefaultProcessorCountForUVM() + } + + processorTopology, err := processorinfo.HostProcessorInfo(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get host processor information: %w", err) + } + // To maintain compatibility with Docker and older shim we need to automatically downgrade + // a user CPU count if the setting is not possible. + cpu.ProcessorCount = vmutils.NormalizeProcessorCount(ctx, "", cpu.ProcessorCount, processorTopology) + + cpu.ProcessorLimit = oci.ParseAnnotationsInt32(ctx, annotations, shimannotations.ProcessorLimit, 0) + cpu.ProcessorWeight = oci.ParseAnnotationsInt32(ctx, annotations, shimannotations.ProcessorWeight, 0) + + // Extract architecture from platform string (e.g., "linux/amd64" -> "amd64") + if idx := strings.IndexByte(platform, '/'); idx != -1 && idx < len(platform)-1 { + cpu.Architecture = platform[idx+1:] + } + + // CPU group configuration + cpu.CpuGroupID = oci.ParseAnnotationsString(annotations, shimannotations.CPUGroupID, "") + if cpu.CpuGroupID != "" && osversion.Build() < osversion.V21H1 { + return nil, vmutils.ErrCPUGroupCreateNotSupported + } + + log.G(ctx).WithFields(logrus.Fields{ + "processorCount": cpu.ProcessorCount, + "processorLimit": cpu.ProcessorLimit, + "processorWeight": cpu.ProcessorWeight, + "architecture": cpu.Architecture, + "cpuGroupID": cpu.CpuGroupID, + }).Debug("parsed CPU configuration") + + return cpu, nil +} + +// parseMemoryOptions parses memory options from annotations and options. +func parseMemoryOptions(ctx context.Context, opts *runhcsoptions.Options, annotations map[string]string) (*shimsandbox.MemoryConfig, error) { + log.G(ctx).Debug("parsing memory configuration") + mem := &shimsandbox.MemoryConfig{} + + mem.MemorySizeInMb = oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.MemorySizeInMB, uint64(opts.VmMemorySizeInMb)) + if mem.MemorySizeInMb <= 0 { + mem.MemorySizeInMb = 1024 + } + // Normalize memory size to be a multiple of 256MB, as required by Hyper-V. + mem.MemorySizeInMb = vmutils.NormalizeMemorySize(ctx, "", mem.MemorySizeInMb) + + mem.LowMmioGapInMb = oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.MemoryLowMMIOGapInMB, 0) + mem.HighMmioBaseInMb = oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.MemoryHighMMIOBaseInMB, 0) + mem.HighMmioGapInMb = oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.MemoryHighMMIOGapInMB, 0) + + mem.AllowOvercommit = oci.ParseAnnotationsBool(ctx, annotations, shimannotations.AllowOvercommit, true) + mem.EnableDeferredCommit = oci.ParseAnnotationsBool(ctx, annotations, shimannotations.EnableDeferredCommit, false) + mem.FullyPhysicallyBacked = oci.ParseAnnotationsBool(ctx, annotations, shimannotations.FullyPhysicallyBacked, false) + + if mem.FullyPhysicallyBacked { + mem.AllowOvercommit = false + } + + mem.EnableColdDiscardHint = oci.ParseAnnotationsBool(ctx, annotations, shimannotations.EnableColdDiscardHint, false) + if mem.EnableColdDiscardHint && osversion.Build() < 18967 { + return nil, fmt.Errorf("EnableColdDiscardHint is not supported on builds older than 18967") + } + + if mem.EnableDeferredCommit && !mem.AllowOvercommit { + return nil, fmt.Errorf("enable_deferred_commit is not supported on physically backed vms") + } + + log.G(ctx).WithFields(logrus.Fields{ + "memorySizeMB": mem.MemorySizeInMb, + "allowOvercommit": mem.AllowOvercommit, + "fullyPhysicalBacked": mem.FullyPhysicallyBacked, + "enableDeferredCommit": mem.EnableDeferredCommit, + "enableColdDiscard": mem.EnableColdDiscardHint, + }).Debug("parsed memory configuration") + + return mem, nil +} + +// parseStorageOptions parses storage options from annotations. +// isConfidential affects the default value for enable_scratch_encryption. +func parseStorageOptions(ctx context.Context, annotations map[string]string, isConfidential bool) (*shimsandbox.StorageConfig, error) { + log.G(ctx).Debug("parsing storage configuration") + + // Default for enable_scratch_encryption is false for non-confidential VMs, + // true for confidential VMs. Can be overridden by annotation. + enableScratchEncryption := oci.ParseAnnotationsBool(ctx, annotations, shimannotations.LCOWEncryptedScratchDisk, isConfidential) + + storageConfig := &shimsandbox.StorageConfig{ + NoWritableFileShares: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableWritableFileShares, false), + StorageQosBandwidthMaximum: oci.ParseAnnotationsInt32(ctx, annotations, shimannotations.StorageQoSBandwidthMaximum, 0), + StorageQosIopsMaximum: oci.ParseAnnotationsInt32(ctx, annotations, shimannotations.StorageQoSIopsMaximum, 0), + EnableScratchEncryption: enableScratchEncryption, + } + + log.G(ctx).WithFields(logrus.Fields{ + "enableScratchEncryption": storageConfig.EnableScratchEncryption, + "qosBandwidthMax": storageConfig.StorageQosBandwidthMaximum, + "qosIopsMax": storageConfig.StorageQosIopsMaximum, + }).Debug("parsed storage configuration") + + return storageConfig, nil +} + +// parseNUMAOptions parses NUMA options from annotations and uses vmutils to +// prepare the vNUMA topology, then converts the result to proto structures. +func parseNUMAOptions(ctx context.Context, annotations map[string]string, mem *shimsandbox.MemoryConfig, cpu *shimsandbox.CPUConfig) (*shimsandbox.NUMAConfig, error) { + log.G(ctx).Debug("parsing NUMA configuration") + + // Build vmutils.NumaConfig from annotations + numaOpts := &vmutils.NumaConfig{ + MaxProcessorsPerNumaNode: oci.ParseAnnotationsUint32(ctx, annotations, shimannotations.NumaMaximumProcessorsPerNode, 0), + MaxMemorySizePerNumaNode: oci.ParseAnnotationsUint64(ctx, annotations, shimannotations.NumaMaximumMemorySizePerNode, 0), + PreferredPhysicalNumaNodes: oci.ParseAnnotationCommaSeparatedUint32(ctx, annotations, shimannotations.NumaPreferredPhysicalNodes, []uint32{}), + NumaMappedPhysicalNodes: oci.ParseAnnotationCommaSeparatedUint32(ctx, annotations, shimannotations.NumaMappedPhysicalNodes, []uint32{}), + NumaProcessorCounts: oci.ParseAnnotationCommaSeparatedUint32(ctx, annotations, shimannotations.NumaCountOfProcessors, []uint32{}), + NumaMemoryBlocksCounts: oci.ParseAnnotationCommaSeparatedUint64(ctx, annotations, shimannotations.NumaCountOfMemoryBlocks, []uint64{}), + } + + // Use vmutils to prepare the vNUMA topology. + hcsNuma, hcsNumaProcessors, err := vmutils.PrepareVNumaTopology(ctx, numaOpts) + if err != nil { + return nil, fmt.Errorf("failed to prepare vNUMA topology: %w", err) + } + + if hcsNuma != nil { + log.G(ctx).WithField("virtualNodeCount", hcsNuma.VirtualNodeCount).Debug("vNUMA topology configured") + if mem.AllowOvercommit { + return nil, fmt.Errorf("vNUMA supports only Physical memory backing type") + } + if err := vmutils.ValidateNumaForVM(hcsNuma, uint32(cpu.ProcessorCount), mem.MemorySizeInMb); err != nil { + return nil, fmt.Errorf("failed to validate vNUMA settings: %w", err) + } + } + + log.G(ctx).WithFields(logrus.Fields{ + "numa": hcsNuma, + "numaProcessors": hcsNumaProcessors, + }).Debug("parsed NUMA configuration") + + // Convert hcsschema types to proto types + return convertHCSNumaToProto(hcsNuma, hcsNumaProcessors), nil +} + +// convertHCSNumaToProto converts hcsschema.Numa and hcsschema.NumaProcessors to proto types. +func convertHCSNumaToProto(hcsNuma *hcsschema.Numa, hcsNumaProcessors *hcsschema.NumaProcessors) *shimsandbox.NUMAConfig { + numaConfig := &shimsandbox.NUMAConfig{} + + if hcsNuma != nil { + numa := &shimsandbox.Numa{ + VirtualNodeCount: uint32(hcsNuma.VirtualNodeCount), + PreferredPhysicalNodes: hcsNuma.PreferredPhysicalNodes, + MaxSizePerNode: hcsNuma.MaxSizePerNode, + } + + // Convert settings if present (explicit topology) + for _, setting := range hcsNuma.Settings { + numa.Settings = append(numa.Settings, &shimsandbox.NumaSetting{ + VirtualNodeNumber: setting.VirtualNodeNumber, + PhysicalNodeNumber: setting.PhysicalNodeNumber, + VirtualSocketNumber: setting.VirtualSocketNumber, + CountOfProcessors: setting.CountOfProcessors, + CountOfMemoryBlocks: setting.CountOfMemoryBlocks, + MemoryBackingType: string(setting.MemoryBackingType), + }) + } + + numaConfig.Numa = numa + } + + if hcsNumaProcessors != nil { + numaConfig.NumaProcessors = &shimsandbox.NumaProcessors{ + CountPerNodeMax: hcsNumaProcessors.CountPerNode.Max, + NodePerSocket: hcsNumaProcessors.NodePerSocket, + } + } + + return numaConfig +} + +// parseAdditionalOptions parses additional options from annotations. +// For confidential VMs, it also configures the HvSocket service table with required VSock ports +// and skips consolePipe configuration. +func parseAdditionalOptions(ctx context.Context, annotations map[string]string, isConfidential bool) (*shimsandbox.AdditionalConfig, error) { + log.G(ctx).Debug("parsing additional configuration") + + hvSocketServiceTable, err := parseHVSocketServiceTableFromAnnotations(ctx, annotations) + if err != nil { + return nil, fmt.Errorf("failed to parse HVSocket service table: %w", err) + } + + // For confidential VMs, consolePipe is not supported. + var consolePipe string + if !isConfidential { + consolePipe = oci.ParseAnnotationsString(annotations, iannotations.UVMConsolePipe, "") + if consolePipe != "" && !strings.HasPrefix(consolePipe, `\\.\pipe\`) { + return nil, fmt.Errorf("listener for serial console is not a named pipe") + } + } + + additional := &shimsandbox.AdditionalConfig{ + // EnableGraphicsConsole is always explicitly false for now. + // If needed, it can be supported by annotations later on. + EnableGraphicsConsole: false, + ConsolePipe: consolePipe, + AdditionalHypervConfig: hvSocketServiceTable, + } + + // For confidential VMs, configure HvSocket service table with required VSock ports. + if isConfidential { + log.G(ctx).Debug("configuring HvSocket service table for confidential VM") + if additional.AdditionalHypervConfig == nil { + additional.AdditionalHypervConfig = make(map[string]*shimsandbox.HvSocketServiceConfig) + } + + // Set permissions for the VSock ports: + // entropyVsockPort - 1 is the entropy port + // linuxLogVsockPort - 109 used by vsockexec to log stdout/stderr logging + // LinuxGcsVsockPort (0x40000000) is the GCS port + // LinuxGcsVsockPort + 1 is the bridge (see guestconnection.go) + hvSockets := []uint32{vmutils.LinuxEntropyVsockPort, vmutils.LinuxLogVsockPort, prot.LinuxGcsVsockPort, prot.LinuxGcsVsockPort + 1} + + // Parse and append extra VSock ports from annotations + extraVsockPorts := oci.ParseAnnotationCommaSeparatedUint32(ctx, annotations, iannotations.ExtraVSockPorts, []uint32{}) + hvSockets = append(hvSockets, extraVsockPorts...) + + log.G(ctx).WithFields(logrus.Fields{ + "vsockPorts": hvSockets, + "extraPort": extraVsockPorts, + }).Debug("configured VSock ports for confidential VM") + + for _, port := range hvSockets { + key := winio.VsockServiceID(port).String() + additional.AdditionalHypervConfig[key] = &shimsandbox.HvSocketServiceConfig{ + AllowWildcardBinds: true, + BindSecurityDescriptor: "D:P(A;;FA;;;WD)", + ConnectSecurityDescriptor: "D:P(A;;FA;;;SY)(A;;FA;;;BA)", + } + } + } + + log.G(ctx).WithField("additionalConfig", additional).Debug("parsed additional configuration") + + return additional, nil +} + +// parseHVSocketServiceTableFromAnnotations parses HVSocket service table from annotations. +func parseHVSocketServiceTableFromAnnotations(ctx context.Context, annotations map[string]string) (map[string]*shimsandbox.HvSocketServiceConfig, error) { + hcsHvSocketServiceTable := oci.ParseHVSocketServiceTable(ctx, annotations) + if len(hcsHvSocketServiceTable) == 0 { + return nil, nil + } + + sc := make(map[string]*shimsandbox.HvSocketServiceConfig, len(hcsHvSocketServiceTable)) + for name, entry := range hcsHvSocketServiceTable { + sc[name] = &shimsandbox.HvSocketServiceConfig{ + BindSecurityDescriptor: entry.BindSecurityDescriptor, + ConnectSecurityDescriptor: entry.ConnectSecurityDescriptor, + AllowWildcardBinds: entry.AllowWildcardBinds, + Disabled: entry.Disabled, + } + } + + return sc, nil +} + +// parseGuestOptions parses guest options from annotations. +func parseGuestOptions(ctx context.Context, annotations map[string]string) (*shimsandbox.GuestOptions, error) { + log.G(ctx).Debug("parsing guest options") + + return &shimsandbox.GuestOptions{ + PolicyBasedRouting: oci.ParseAnnotationsBool(ctx, annotations, iannotations.NetworkingPolicyBasedRouting, false), + }, nil +} diff --git a/cmd/containerd-shim-lcow-v1/specs/sandbox_specs_test.go b/cmd/containerd-shim-lcow-v1/specs/sandbox_specs_test.go new file mode 100644 index 0000000000..8fa3ee8411 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/sandbox_specs_test.go @@ -0,0 +1,1722 @@ +package specs + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + iannotations "github.com/Microsoft/hcsshim/internal/annotations" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + shimannotations "github.com/Microsoft/hcsshim/pkg/annotations" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +type specTestCase struct { + name string + opts *runhcsoptions.Options + annotations map[string]string + devices []specs.WindowsDevice + wantErr bool + errContains string + validate func(t *testing.T, spec *shimsandbox.Spec) +} + +func runTestCases(t *testing.T, ctx context.Context, defaultOpts *runhcsoptions.Options, cases []specTestCase) { + t.Helper() + + for _, tt := range cases { + tt := tt + t.Run(tt.name, func(t *testing.T) { + opts := tt.opts + if opts == nil { + opts = defaultOpts + } + + devices := tt.devices + if devices == nil { + devices = []specs.WindowsDevice{} + } + + spec, err := GenerateSandboxSpecs(ctx, opts, tt.annotations, devices) + + if tt.wantErr { + if err == nil { + t.Errorf("expected error containing %q, got nil", tt.errContains) + return + } + if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("expected error containing %q, got %q", tt.errContains, err.Error()) + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if tt.validate != nil { + tt.validate(t, spec) + } + }) + } +} + +// Helper function to extract kernel args from spec for validation +func getKernelArgs(spec *shimsandbox.Spec) string { + if spec.BootOptions.LinuxKernelDirect != nil { + return spec.BootOptions.LinuxKernelDirect.KernelCmdLine + } + if spec.BootOptions.Uefi != nil && spec.BootOptions.Uefi.BootThis != nil { + return spec.BootOptions.Uefi.BootThis.OptionalData + } + return "" +} + +func TestGenerateSandboxSpecs(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + + tests := []specTestCase{ + { + name: "nil options should return error", + opts: nil, + wantErr: true, + errContains: "no options provided", + }, + { + name: "unsupported platform should return error", + opts: &runhcsoptions.Options{ + SandboxPlatform: "windows/amd64", + }, + wantErr: true, + errContains: "unsupported sandbox platform", + }, + { + name: "minimal valid config for linux/amd64", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec == nil { + t.Fatal("spec should not be nil") + } + if spec.CpuConfig == nil { + t.Fatal("cpu config should not be nil") + } + if spec.CpuConfig.Architecture != "amd64" { + t.Errorf("expected architecture amd64, got %v", spec.CpuConfig.Architecture) + } + if spec.MemoryConfig == nil { + t.Fatal("memory config should not be nil") + } + if spec.MemoryConfig.MemorySizeInMb != 1024 { + t.Errorf("expected default memory 1024MB, got %v", spec.MemoryConfig.MemorySizeInMb) + } + }, + }, + { + name: "minimal valid config for linux/arm64", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/arm64", + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.Architecture != "arm64" { + t.Errorf("expected architecture arm64, got %v", spec.CpuConfig.Architecture) + } + }, + }, + { + name: "platform case insensitive", + opts: &runhcsoptions.Options{ + SandboxPlatform: "Linux/AMD64", + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.Architecture != "amd64" { + t.Errorf("expected architecture amd64, got %v", spec.CpuConfig.Architecture) + } + }, + }, + { + name: "CPU configuration from options", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + VmProcessorCount: 4, + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount != 4 { + t.Errorf("expected processor count 4, got %v", spec.CpuConfig.ProcessorCount) + } + }, + }, + { + name: "CPU configuration from annotations", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ProcessorCount: "8", + shimannotations.ProcessorLimit: "50000", + shimannotations.ProcessorWeight: "500", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount != 8 { + t.Errorf("expected processor count 8, got %v", spec.CpuConfig.ProcessorCount) + } + if spec.CpuConfig.ProcessorLimit != 50000 { + t.Errorf("expected processor limit 50000, got %v", spec.CpuConfig.ProcessorLimit) + } + if spec.CpuConfig.ProcessorWeight != 500 { + t.Errorf("expected processor weight 500, got %v", spec.CpuConfig.ProcessorWeight) + } + }, + }, + { + name: "memory configuration from options", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + VmMemorySizeInMb: 2048, + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.MemoryConfig.MemorySizeInMb != 2048 { + t.Errorf("expected memory size 2048MB, got %v", spec.MemoryConfig.MemorySizeInMb) + } + }, + }, + { + name: "memory configuration from annotations", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.MemorySizeInMB: "4096", + shimannotations.AllowOvercommit: "false", + shimannotations.EnableDeferredCommit: "false", + shimannotations.FullyPhysicallyBacked: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.MemoryConfig.MemorySizeInMb != 4096 { + t.Errorf("expected memory size 4096MB, got %v", spec.MemoryConfig.MemorySizeInMb) + } + if spec.MemoryConfig.AllowOvercommit != false { + t.Errorf("expected allow overcommit false, got %v", spec.MemoryConfig.AllowOvercommit) + } + if spec.MemoryConfig.FullyPhysicallyBacked != true { + t.Errorf("expected fully physically backed true, got %v", spec.MemoryConfig.FullyPhysicallyBacked) + } + }, + }, + { + name: "memory MMIO configuration", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.MemoryLowMMIOGapInMB: "256", + shimannotations.MemoryHighMMIOBaseInMB: "1024", + shimannotations.MemoryHighMMIOGapInMB: "512", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.MemoryConfig.LowMmioGapInMb != 256 { + t.Errorf("expected low MMIO gap 256MB, got %v", spec.MemoryConfig.LowMmioGapInMb) + } + if spec.MemoryConfig.HighMmioBaseInMb != 1024 { + t.Errorf("expected high MMIO base 1024MB, got %v", spec.MemoryConfig.HighMmioBaseInMb) + } + if spec.MemoryConfig.HighMmioGapInMb != 512 { + t.Errorf("expected high MMIO gap 512MB, got %v", spec.MemoryConfig.HighMmioGapInMb) + } + }, + }, + { + name: "storage QoS configuration", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.StorageQoSBandwidthMaximum: "1000000", + shimannotations.StorageQoSIopsMaximum: "5000", + shimannotations.DisableWritableFileShares: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.StorageConfig.StorageQosBandwidthMaximum != 1000000 { + t.Errorf("expected storage bandwidth 1000000, got %v", spec.StorageConfig.StorageQosBandwidthMaximum) + } + if spec.StorageConfig.StorageQosIopsMaximum != 5000 { + t.Errorf("expected storage IOPS 5000, got %v", spec.StorageConfig.StorageQosIopsMaximum) + } + if spec.StorageConfig.NoWritableFileShares != true { + t.Errorf("expected no writable file shares true, got %v", spec.StorageConfig.NoWritableFileShares) + } + }, + }, + { + name: "NUMA configuration implicit", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.NumaMaximumProcessorsPerNode: "4", + shimannotations.NumaMaximumMemorySizePerNode: "2048", + shimannotations.NumaPreferredPhysicalNodes: "0,1,2", + shimannotations.FullyPhysicallyBacked: "true", // Required for NUMA + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.NumaConfig.NumaProcessors == nil { + t.Fatal("expected NumaProcessors to be set for implicit topology") + } + if spec.NumaConfig.NumaProcessors.CountPerNodeMax != 4 { + t.Errorf("expected max processors per NUMA node 4, got %v", spec.NumaConfig.NumaProcessors.CountPerNodeMax) + } + if spec.NumaConfig.Numa == nil { + t.Fatal("expected Numa to be set for implicit topology") + } + if spec.NumaConfig.Numa.MaxSizePerNode != 2048 { + t.Errorf("expected max memory per NUMA node 2048MB, got %v", spec.NumaConfig.Numa.MaxSizePerNode) + } + if len(spec.NumaConfig.Numa.PreferredPhysicalNodes) != 3 { + t.Errorf("expected 3 preferred physical NUMA nodes, got %d", len(spec.NumaConfig.Numa.PreferredPhysicalNodes)) + } + }, + }, + { + name: "NUMA configuration explicit", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + VmProcessorCount: 4, // Must match total in NumaCountOfProcessors (2+2) + VmMemorySizeInMb: int32(2048), // Must match total in NumaCountOfMemoryBlocks (1024+1024) + }, + annotations: map[string]string{ + shimannotations.NumaMappedPhysicalNodes: "0,1", + shimannotations.NumaCountOfProcessors: "2,2", + shimannotations.NumaCountOfMemoryBlocks: "1024,1024", + shimannotations.FullyPhysicallyBacked: "true", // NUMA requires physical memory backing + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.NumaConfig.Numa == nil { + t.Fatal("expected Numa to be set for explicit topology") + } + if spec.NumaConfig.Numa.VirtualNodeCount != 2 { + t.Errorf("expected 2 virtual nodes, got %d", spec.NumaConfig.Numa.VirtualNodeCount) + } + if len(spec.NumaConfig.Numa.Settings) != 2 { + t.Errorf("expected 2 NUMA settings, got %d", len(spec.NumaConfig.Numa.Settings)) + } + // Verify first NUMA setting + if spec.NumaConfig.Numa.Settings[0].PhysicalNodeNumber != 0 { + t.Errorf("expected physical node 0, got %d", spec.NumaConfig.Numa.Settings[0].PhysicalNodeNumber) + } + if spec.NumaConfig.Numa.Settings[0].CountOfProcessors != 2 { + t.Errorf("expected 2 processors, got %d", spec.NumaConfig.Numa.Settings[0].CountOfProcessors) + } + if spec.NumaConfig.Numa.Settings[0].CountOfMemoryBlocks != 1024 { + t.Errorf("expected 1024 memory blocks, got %d", spec.NumaConfig.Numa.Settings[0].CountOfMemoryBlocks) + } + if spec.NumaConfig.Numa.Settings[0].MemoryBackingType != "Physical" { + t.Errorf("expected Physical backing type, got %s", spec.NumaConfig.Numa.Settings[0].MemoryBackingType) + } + }, + }, + { + name: "NUMA configuration explicit mismatch error", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.NumaMappedPhysicalNodes: "0,1", + shimannotations.NumaCountOfProcessors: "2", // Only 1 value instead of 2 + shimannotations.NumaCountOfMemoryBlocks: "1024,1024", + }, + wantErr: true, + errContains: "mismatch in number of physical numa nodes", + }, + { + name: "boot options with kernel direct", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "true", + shimannotations.KernelBootOptions: "console=ttyS0", + shimannotations.PreferredRootFSType: "vhd", + shimannotations.EnableColdDiscardHint: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.BootOptions.LinuxKernelDirect == nil { + t.Error("expected kernel direct boot (LinuxKernelDirect to be set)") + } + // Check that kernel boot options are included in the command line + if !strings.Contains(getKernelArgs(spec), "console=ttyS0") { + t.Errorf("expected kernel cmd line to contain 'console=ttyS0', got %v", getKernelArgs(spec)) + } + if spec.MemoryConfig.EnableColdDiscardHint != true { + t.Errorf("expected cold discard hint true, got %v", spec.MemoryConfig.EnableColdDiscardHint) + } + }, + }, + { + name: "boot options with initrd preferred", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "initrd", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // When initrd is preferred, LinuxKernelDirect should have InitRdPath set + if spec.BootOptions.LinuxKernelDirect != nil && spec.BootOptions.LinuxKernelDirect.InitRdPath == "" { + t.Error("expected InitRdPath to be set for initrd boot") + } + }, + }, + { + name: "invalid preferred rootfs type", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "invalid", + }, + wantErr: true, + errContains: "invalid PreferredRootFSType", + }, + { + name: "boot files path not found", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: "/nonexistent/path", + }, + wantErr: true, + errContains: "boot_files_root_path", + }, + { + name: "guest options", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + iannotations.NetworkingPolicyBasedRouting: "true", + iannotations.ExtraVSockPorts: "8000,8001,8002", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.GuestOptions.PolicyBasedRouting != true { + t.Errorf("expected policy based routing true, got %v", spec.GuestOptions.PolicyBasedRouting) + } + // Extra vsock ports annotation is parsed but in non-confidential mode, + // they are not automatically added to AdditionalHypervConfig. + // The ExtraVSockPorts annotation is only used in confidential VMs to add + // ports to the HvSocket service table. In non-confidential mode, users would + // need to use the HvSocket service table annotations directly. + // So we just validate that the GuestOptions was populated correctly. + if spec.GuestOptions == nil { + t.Error("expected GuestOptions to be configured") + } + }, + }, + { + name: "device options with VPMem", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.VPMemCount: "32", + shimannotations.VPMemSize: "8589934592", + shimannotations.VPMemNoMultiMapping: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.DeviceOptions.VpMemController == nil { + t.Errorf("expected VpMemController to be configured") + return + } + if spec.DeviceOptions.VpMemController.MaximumCount != 32 { + t.Errorf("expected VPMem count 32, got %v", spec.DeviceOptions.VpMemController.MaximumCount) + } + if spec.DeviceOptions.VpMemController.MaximumSizeBytes != 8589934592 { + t.Errorf("expected VPMem size 8589934592, got %v", spec.DeviceOptions.VpMemController.MaximumSizeBytes) + } + if spec.DeviceOptions.VpMemController.MultiMapping != false { + t.Errorf("expected VPMem multi mapping false (no multi mapping true), got %v", spec.DeviceOptions.VpMemController.MultiMapping) + } + }, + }, + { + name: "VPMem count exceeds maximum", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.VPMemCount: "200", + }, + wantErr: true, + errContains: "vp_mem_device_count cannot be greater than", + }, + { + name: "VPMem size not aligned to 4096", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.VPMemSize: "12345", + }, + wantErr: true, + errContains: "vp_mem_size_bytes must be a multiple of 4096", + }, + { + name: "fully physically backed disables VPMem", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.FullyPhysicallyBacked: "true", + shimannotations.VPMemCount: "64", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.DeviceOptions.VpMemController != nil && spec.DeviceOptions.VpMemController.MaximumCount != 0 { + t.Errorf("expected VPMem count 0 when fully physically backed, got %v", spec.DeviceOptions.VpMemController.MaximumCount) + } + }, + }, + { + name: "assigned devices - VPCI", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_1234&DEV_5678", + IDType: "vpci-instance-id", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if len(spec.DeviceOptions.VpciDevices) != 1 { + t.Errorf("expected 1 assigned device, got %d", len(spec.DeviceOptions.VpciDevices)) + } + if spec.DeviceOptions.VpciDevices[0].DeviceInstanceID != "PCIP\\VEN_1234&DEV_5678" { + t.Errorf("unexpected device instance ID: %s", spec.DeviceOptions.VpciDevices[0].DeviceInstanceID) + } + }, + }, + { + name: "assigned devices - GPU", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + devices: []specs.WindowsDevice{ + { + ID: "GPU-12345678-1234-5678-1234-567812345678", + IDType: "gpu", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if len(spec.DeviceOptions.VpciDevices) != 1 { + t.Errorf("expected 1 assigned device, got %d", len(spec.DeviceOptions.VpciDevices)) + } + }, + }, + { + name: "assigned devices with virtual function index", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_1234&DEV_5678/2", + IDType: "vpci-instance-id", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if len(spec.DeviceOptions.VpciDevices) != 1 { + t.Errorf("expected 1 assigned device, got %d", len(spec.DeviceOptions.VpciDevices)) + } + if spec.DeviceOptions.VpciDevices[0].VirtualFunctionIndex != 2 { + t.Errorf("expected virtual function index 2, got %d", spec.DeviceOptions.VpciDevices[0].VirtualFunctionIndex) + } + }, + }, + { + name: "confidential options with security policy (no hardware bypass)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", // valid base64: {"test":"test"} + shimannotations.LCOWSecurityPolicyEnforcer: "rego", + shimannotations.LCOWEncryptedScratchDisk: "true", + // Note: NoSecurityHardware NOT set, so it defaults to false, meaning SNP mode + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.ConfidentialOptions == nil { + t.Fatal("expected ConfidentialOptions to be set") + } + if spec.ConfidentialOptions.SecurityPolicy != "eyJ0ZXN0IjoidGVzdCJ9" { + t.Errorf("expected security policy, got %v", spec.ConfidentialOptions.SecurityPolicy) + } + if spec.ConfidentialOptions.SecurityPolicyEnforcer != "rego" { + t.Errorf("expected security policy enforcer 'rego', got %v", spec.ConfidentialOptions.SecurityPolicyEnforcer) + } + if spec.StorageConfig.EnableScratchEncryption != true { + t.Errorf("expected scratch encryption true, got %v", spec.StorageConfig.EnableScratchEncryption) + } + // VMGS template path should be set in confidential mode + if spec.ConfidentialOptions.VmgsTemplatePath == "" { + t.Error("expected VMGS template path to be set in confidential mode") + } + // DM-Verity rootfs template path should be set in confidential mode + if spec.ConfidentialOptions.DmVerityRootfsTemplatePath == "" { + t.Error("expected DM-Verity rootfs template path to be set in confidential mode") + } + }, + }, + { + name: "confidential SNP mode configuration", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJzbmAiOiJ0ZXN0In0=", // valid base64: {"snp":"test"} + shimannotations.NoSecurityHardware: "false", + shimannotations.VPMemCount: "64", + shimannotations.DmVerityCreateArgs: "test-verity-args", // Required for SNP mode VHD boot + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // In SNP mode, VPMem should be disabled + if spec.DeviceOptions.VpMemController != nil && spec.DeviceOptions.VpMemController.MaximumCount != 0 { + t.Errorf("expected VPMem count 0 in SNP mode, got %v", spec.DeviceOptions.VpMemController.MaximumCount) + } + // Memory should not allow overcommit + if spec.MemoryConfig.AllowOvercommit != false { + t.Errorf("expected allow overcommit false in SNP mode, got %v", spec.MemoryConfig.AllowOvercommit) + } + // VMGS template path should be set + if spec.ConfidentialOptions.VmgsTemplatePath == "" { + t.Error("expected VMGS template path to be set in SNP mode") + } + // DM-Verity root fs VHD template path should be set (indicates DM-Verity is enabled) + if spec.ConfidentialOptions.DmVerityRootfsTemplatePath == "" { + t.Error("expected DM-Verity root fs VHD template path to be set in SNP mode") + } + }, + }, + { + name: "confidential options with custom files", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", // Must have policy to enable confidential mode + shimannotations.LCOWGuestStateFile: "custom.vmgs", + shimannotations.DmVerityRootFsVhd: "custom-rootfs.vhd", + shimannotations.LCOWReferenceInfoFile: "custom-ref.cose", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.ConfidentialOptions == nil { + t.Fatal("expected ConfidentialOptions to be set") + } + expectedVmgsTemplatePath := filepath.Join(validBootFilesPath, "custom.vmgs") + if spec.ConfidentialOptions.VmgsTemplatePath != expectedVmgsTemplatePath { + t.Errorf("expected vmgs template path %q, got %q", expectedVmgsTemplatePath, spec.ConfidentialOptions.VmgsTemplatePath) + } + expectedDmVerityRootfsTemplatePath := filepath.Join(validBootFilesPath, "custom-rootfs.vhd") + if spec.ConfidentialOptions.DmVerityRootfsTemplatePath != expectedDmVerityRootfsTemplatePath { + t.Errorf("expected custom dm-verity rootfs VHD template path %q, got %q", expectedDmVerityRootfsTemplatePath, spec.ConfidentialOptions.DmVerityRootfsTemplatePath) + } + if spec.ConfidentialOptions.UvmReferenceInfoFile != "custom-ref.cose" { + t.Errorf("expected custom reference info file, got %v", spec.ConfidentialOptions.UvmReferenceInfoFile) + } + }, + }, + { + name: "additional config - console pipe", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + iannotations.UVMConsolePipe: "\\\\.\\pipe\\console", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.AdditionalConfig.ConsolePipe != "\\\\.\\pipe\\console" { + t.Errorf("expected console pipe, got %v", spec.AdditionalConfig.ConsolePipe) + } + }, + }, + { + name: "CPU group ID", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.CPUGroupID: "12345678-1234-5678-1234-567812345678", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.CpuGroupID != "12345678-1234-5678-1234-567812345678" { + t.Errorf("expected CPU group ID, got %v", spec.CpuConfig.CpuGroupID) + } + }, + }, + { + name: "resource partition ID", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ResourcePartitionID: "87654321-4321-8765-4321-876543218765", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.ResourcePartitionID == nil || *spec.ResourcePartitionID != "87654321-4321-8765-4321-876543218765" { + t.Errorf("expected resource partition ID, got %v", spec.ResourcePartitionID) + } + }, + }, + { + name: "CPU group and resource partition conflict", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.CPUGroupID: "12345678-1234-5678-1234-567812345678", + shimannotations.ResourcePartitionID: "87654321-4321-8765-4321-876543218765", + }, + wantErr: true, + errContains: "cpu_group_id and resource_partition_id cannot be set at the same time", + }, + { + name: "invalid CPU group GUID", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.CPUGroupID: "invalid-guid", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.CpuGroupID != "invalid-guid" { + t.Errorf("expected CPU group ID to be set, got %v", spec.CpuConfig.CpuGroupID) + } + }, + }, + { + name: "invalid resource partition GUID", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ResourcePartitionID: "not-a-guid", + }, + wantErr: true, + errContains: "failed to parse resource_partition_id", + }, + { + name: "default container annotations applied", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + DefaultContainerAnnotations: map[string]string{ + shimannotations.ProcessorCount: "4", + shimannotations.MemorySizeInMB: "2048", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount != 4 { + t.Errorf("expected processor count 4 from defaults, got %v", spec.CpuConfig.ProcessorCount) + } + if spec.MemoryConfig.MemorySizeInMb != 2048 { + t.Errorf("expected memory 2048MB from defaults, got %v", spec.MemoryConfig.MemorySizeInMb) + } + }, + }, + { + name: "annotations override default container annotations", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + DefaultContainerAnnotations: map[string]string{ + shimannotations.ProcessorCount: "2", + }, + }, + annotations: map[string]string{ + shimannotations.ProcessorCount: "8", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount != 8 { + t.Errorf("expected processor count 8 (annotation overrides default), got %v", spec.CpuConfig.ProcessorCount) + } + }, + }, + { + name: "comprehensive configuration", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + VmProcessorCount: 4, + VmMemorySizeInMb: 4096, + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ProcessorLimit: "80000", + shimannotations.ProcessorWeight: "500", + shimannotations.AllowOvercommit: "false", + shimannotations.FullyPhysicallyBacked: "true", + shimannotations.StorageQoSBandwidthMaximum: "500000", + shimannotations.StorageQoSIopsMaximum: "3000", + shimannotations.DisableWritableFileShares: "true", + shimannotations.NumaMaximumProcessorsPerNode: "2", + shimannotations.NumaMaximumMemorySizePerNode: "2048", + shimannotations.KernelDirectBoot: "true", + shimannotations.KernelBootOptions: "console=ttyS0 loglevel=7", + shimannotations.PreferredRootFSType: "vhd", + shimannotations.DisableLCOWTimeSyncService: "false", + iannotations.ExtraVSockPorts: "9000,9001", + shimannotations.VPCIEnabled: "true", + shimannotations.ContainerProcessDumpLocation: "/var/dumps", + shimannotations.DumpDirectoryPath: "C:\\UVMDumps", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Verify comprehensive configuration + if spec.CpuConfig.ProcessorCount != 4 { + t.Errorf("expected processor count 4, got %v", spec.CpuConfig.ProcessorCount) + } + if spec.MemoryConfig.MemorySizeInMb != 4096 { + t.Errorf("expected memory 4096MB, got %v", spec.MemoryConfig.MemorySizeInMb) + } + if spec.StorageConfig.NoWritableFileShares != true { + t.Error("expected no writable file shares") + } + if spec.BootOptions.LinuxKernelDirect == nil { + t.Error("expected kernel direct boot (LinuxKernelDirect to be set)") + } + // VPMem should be 0 due to fully physically backed + if spec.DeviceOptions.VpMemController != nil && spec.DeviceOptions.VpMemController.MaximumCount != 0 { + t.Errorf("expected VPMem count 0 (fully physically backed), got %v", spec.DeviceOptions.VpMemController.MaximumCount) + } + }, + }, + } + + runTestCases(t, ctx, nil, tests) +} + +// TestGenerateSandboxSpecs_EdgeCases tests edge cases and boundary conditions +func TestGenerateSandboxSpecs_EdgeCases(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + + tests := []specTestCase{ + { + name: "zero processor count falls back to default", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + VmProcessorCount: 0, + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount <= 0 { + t.Error("expected positive processor count from default") + } + }, + }, + { + name: "negative processor count falls back to default", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ProcessorCount: "-1", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount <= 0 { + t.Error("expected positive processor count from default") + } + }, + }, + { + name: "zero memory size falls back to default 1024", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + VmMemorySizeInMb: 0, + BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.MemoryConfig.MemorySizeInMb != 1024 { + t.Errorf("expected default memory 1024MB, got %v", spec.MemoryConfig.MemorySizeInMb) + } + }, + }, + { + name: "VPMem size exactly 4096", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.VPMemSize: "4096", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.DeviceOptions.VpMemController == nil { + t.Error("expected VpMemController to be configured") + return + } + if spec.DeviceOptions.VpMemController.MaximumSizeBytes != 4096 { + t.Errorf("expected VPMem size 4096, got %v", spec.DeviceOptions.VpMemController.MaximumSizeBytes) + } + }, + }, + { + name: "VPMem count at maximum boundary", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.VPMemCount: "128", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.DeviceOptions.VpMemController == nil { + t.Error("expected VpMemController to be configured") + return + } + if spec.DeviceOptions.VpMemController.MaximumCount != 128 { + t.Errorf("expected VPMem count 128, got %v", spec.DeviceOptions.VpMemController.MaximumCount) + } + }, + }, + { + name: "processor limit at maximum", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ProcessorLimit: "100000", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorLimit != 100000 { + t.Errorf("expected processor limit 100000, got %v", spec.CpuConfig.ProcessorLimit) + } + }, + }, + { + name: "processor weight at maximum", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + }, + annotations: map[string]string{ + shimannotations.ProcessorWeight: "10000", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorWeight != 10000 { + t.Errorf("expected processor weight 10000, got %v", spec.CpuConfig.ProcessorWeight) + } + }, + }, + { + name: "boot files path annotation overrides options", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: "/nonexistent/path", + }, + annotations: map[string]string{ + shimannotations.BootFilesRootPath: validBootFilesPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Boot options should be set when valid boot files path is provided via annotation + if spec.BootOptions == nil { + t.Error("expected boot options to be set") + } + }, + }, + { + name: "empty annotations map with defaults", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + DefaultContainerAnnotations: map[string]string{ + shimannotations.ProcessorCount: "4", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.CpuConfig.ProcessorCount != 4 { + t.Errorf("expected processor count 4 from defaults, got %v", spec.CpuConfig.ProcessorCount) + } + }, + }, + } + + runTestCases(t, ctx, nil, tests) +} + +// TestGenerateSandboxSpecs_SecurityPolicyInteractions tests complex interactions with security policies +func TestGenerateSandboxSpecs_SecurityPolicyInteractions(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + tests := []specTestCase{ + { + name: "security policy without hardware forces SNP mode", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + shimannotations.NoSecurityHardware: "false", + shimannotations.VPMemCount: "64", + shimannotations.AllowOvercommit: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // VPMem disabled in SNP + if spec.DeviceOptions.VpMemController != nil && spec.DeviceOptions.VpMemController.MaximumCount != 0 { + t.Error("expected VPMem disabled in SNP mode") + } + // Overcommit disabled in SNP + if spec.MemoryConfig.AllowOvercommit != false { + t.Error("expected overcommit disabled in SNP mode") + } + // DM-Verity enabled (check for DmVerityRootfsTemplatePath being set) + if spec.ConfidentialOptions.DmVerityRootfsTemplatePath == "" { + t.Error("expected DM-Verity root fs VHD template path to be set in SNP mode") + } + // VMGS template path should be set + if spec.ConfidentialOptions.VmgsTemplatePath == "" { + t.Error("expected VMGS template path to be set in SNP mode") + } + // Kernel boot options should be built (not empty) since buildKernelArgs runs + // In SNP mode, user-provided kernel boot options are cleared, but the + // built kernel args should still be present + if spec.BootOptions.LinuxKernelDirect != nil && spec.BootOptions.LinuxKernelDirect.KernelCmdLine == "" { + t.Error("expected kernel args to be built in SNP mode") + } + }, + }, + { + name: "security policy with hardware bypass", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + shimannotations.NoSecurityHardware: "true", + shimannotations.VPMemCount: "64", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // VPMem NOT disabled when no hardware + if spec.DeviceOptions.VpMemController == nil || spec.DeviceOptions.VpMemController.MaximumCount == 0 { + t.Error("expected VPMem NOT disabled when no security hardware") + } + }, + }, + { + name: "scratch encryption defaults to false with security policy when hardware bypassed", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + shimannotations.NoSecurityHardware: "true", // Bypasses confidential mode + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // When NoSecurityHardware is true, isConfidential is false, + // so EnableScratchEncryption defaults to false + if spec.StorageConfig.EnableScratchEncryption != false { + t.Error("expected scratch encryption disabled by default when security hardware is bypassed") + } + }, + }, + { + name: "scratch encryption can be disabled explicitly", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "policy", + shimannotations.LCOWEncryptedScratchDisk: "false", + shimannotations.NoSecurityHardware: "true", // Bypass SNP mode for this test + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.StorageConfig.EnableScratchEncryption != false { + t.Error("expected scratch encryption disabled when explicitly set") + } + }, + }, + { + name: "scratch encryption defaults to false without security policy", + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.StorageConfig.EnableScratchEncryption != false { + t.Error("expected scratch encryption disabled without security policy") + } + }, + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} + +func newBootFilesPath(t *testing.T) string { + t.Helper() + + tempBootDir := t.TempDir() + validBootFilesPath := filepath.Join(tempBootDir, "bootfiles") + if err := os.MkdirAll(validBootFilesPath, 0755); err != nil { + t.Fatalf("failed to create temp boot files dir: %v", err) + } + + // Create the required boot files (kernel and rootfs) + // Also create vmgs and dm-verity files for confidential tests + for _, filename := range []string{ + vmutils.KernelFile, + vmutils.UncompressedKernelFile, + vmutils.InitrdFile, + vmutils.VhdFile, + "kernel.vmgs", // defaultGuestStateFile + "rootfs.vhd", // DefaultDmVerityRootfsVhd + "custom.vmgs", // for custom files test + "custom-rootfs.vhd", // for custom files test + } { + filePath := filepath.Join(validBootFilesPath, filename) + if err := os.WriteFile(filePath, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test boot file %s: %v", filename, err) + } + } + + return validBootFilesPath +} + +func defaultSandboxOpts(bootFilesPath string) *runhcsoptions.Options { + return &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: bootFilesPath, + } +} + +// TestGenerateSandboxSpecs_ErrorPaths tests error handling paths +func TestGenerateSandboxSpecs_ErrorPaths(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + // Create a boot files path with missing kernel file for error testing + missingKernelPath := filepath.Join(t.TempDir(), "missing_kernel") + if err := os.MkdirAll(missingKernelPath, 0755); err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + // Only create initrd, no kernel + if err := os.WriteFile(filepath.Join(missingKernelPath, vmutils.InitrdFile), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create initrd: %v", err) + } + + // Create a boot files path with missing initrd for error testing + missingInitrdPath := filepath.Join(t.TempDir(), "missing_initrd") + if err := os.MkdirAll(missingInitrdPath, 0755); err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + // Only create kernel, no initrd + if err := os.WriteFile(filepath.Join(missingInitrdPath, vmutils.KernelFile), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create kernel: %v", err) + } + + // Create a boot files path for confidential VM error testing (missing VMGS) + missingVMGSPath := filepath.Join(t.TempDir(), "missing_vmgs") + if err := os.MkdirAll(missingVMGSPath, 0755); err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + + // Create a boot files path for confidential VM error testing (missing dm-verity VHD) + missingDmVerityPath := filepath.Join(t.TempDir(), "missing_dmverity") + if err := os.MkdirAll(missingDmVerityPath, 0755); err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + // Create VMGS but not dm-verity VHD + if err := os.WriteFile(filepath.Join(missingDmVerityPath, vmutils.DefaultGuestStateFile), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create vmgs: %v", err) + } + + tests := []specTestCase{ + { + name: "processAnnotations error - unsupported NetworkConfigProxy annotation", + annotations: map[string]string{ + shimannotations.NetworkConfigProxy: "some-proxy", + }, + wantErr: true, + errContains: "annotation is not supported", + }, + { + name: "kernel file not found in boot files path", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: missingKernelPath, + }, + wantErr: true, + errContains: "kernel file", + }, + { + name: "initrd file not found when preferred", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: missingInitrdPath, + }, + annotations: map[string]string{ + shimannotations.PreferredRootFSType: "initrd", + }, + wantErr: true, + errContains: "not found in boot files path", + }, + { + name: "deferred commit not supported with physical backing", + annotations: map[string]string{ + shimannotations.EnableDeferredCommit: "true", + shimannotations.FullyPhysicallyBacked: "true", + }, + wantErr: true, + errContains: "enable_deferred_commit is not supported on physically backed vms", + }, + { + name: "NUMA configuration requires physical backing", + annotations: map[string]string{ + shimannotations.NumaMaximumProcessorsPerNode: "4", + shimannotations.NumaMaximumMemorySizePerNode: "2048", + shimannotations.AllowOvercommit: "true", + }, + wantErr: true, + errContains: "vNUMA supports only Physical memory backing type", + }, + { + name: "confidential VM missing guest state file", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: missingVMGSPath, + }, + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + }, + wantErr: true, + errContains: "GuestState vmgs file", + }, + { + name: "confidential VM missing dm-verity VHD", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: missingDmVerityPath, + }, + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + }, + wantErr: true, + errContains: "DM Verity VHD file", + }, + { + name: "duplicate assigned devices", + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_1234&DEV_5678", + IDType: "vpci-instance-id", + }, + { + ID: "PCIP\\VEN_1234&DEV_5678", + IDType: "vpci-instance-id", + }, + }, + wantErr: true, + errContains: "specified multiple times", + }, + { + name: "duplicate assigned devices with same function index", + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_1234&DEV_5678/1", + IDType: "vpci-instance-id", + }, + { + ID: "PCIP\\VEN_1234&DEV_5678/1", + IDType: "vpci-instance-id", + }, + }, + wantErr: true, + errContains: "specified multiple times", + }, + { + name: "invalid console pipe path", + annotations: map[string]string{ + iannotations.UVMConsolePipe: "/invalid/path", + }, + wantErr: true, + errContains: "listener for serial console is not a named pipe", + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} + +// TestGenerateSandboxSpecs_BootOptions tests various boot option scenarios +func TestGenerateSandboxSpecs_BootOptions(t *testing.T) { + ctx := context.Background() + + // Create various boot file configurations for testing + tempDir := t.TempDir() + + // Configuration 1: Only VHD, no initrd + vhdOnlyPath := filepath.Join(tempDir, "vhd_only") + if err := os.MkdirAll(vhdOnlyPath, 0755); err != nil { + t.Fatalf("failed to create vhd only dir: %v", err) + } + for _, f := range []string{vmutils.KernelFile, vmutils.UncompressedKernelFile, vmutils.VhdFile} { + if err := os.WriteFile(filepath.Join(vhdOnlyPath, f), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create file %s: %v", f, err) + } + } + + // Configuration 2: Only initrd, no VHD + initrdOnlyPath := filepath.Join(tempDir, "initrd_only") + if err := os.MkdirAll(initrdOnlyPath, 0755); err != nil { + t.Fatalf("failed to create initrd only dir: %v", err) + } + for _, f := range []string{vmutils.KernelFile, vmutils.InitrdFile} { + if err := os.WriteFile(filepath.Join(initrdOnlyPath, f), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create file %s: %v", f, err) + } + } + + // Configuration 3: Only uncompressed kernel for kernel direct + uncompressedOnlyPath := filepath.Join(tempDir, "uncompressed_only") + if err := os.MkdirAll(uncompressedOnlyPath, 0755); err != nil { + t.Fatalf("failed to create uncompressed only dir: %v", err) + } + for _, f := range []string{vmutils.UncompressedKernelFile, vmutils.InitrdFile, vmutils.VhdFile} { + if err := os.WriteFile(filepath.Join(uncompressedOnlyPath, f), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create file %s: %v", f, err) + } + } + + // Configuration 5: No kernel direct support (only kernel, no vmlinux) + noKernelDirectPath := filepath.Join(tempDir, "no_kernel_direct") + if err := os.MkdirAll(noKernelDirectPath, 0755); err != nil { + t.Fatalf("failed to create no kernel direct dir: %v", err) + } + for _, f := range []string{vmutils.KernelFile, vmutils.InitrdFile} { + if err := os.WriteFile(filepath.Join(noKernelDirectPath, f), []byte("test"), 0644); err != nil { + t.Fatalf("failed to create file %s: %v", f, err) + } + } + + tests := []specTestCase{ + { + name: "boot with VHD only (no initrd)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Should boot from VHD + if spec.BootOptions.LinuxKernelDirect != nil { + if spec.BootOptions.LinuxKernelDirect.InitRdPath != "" { + t.Error("expected InitRdPath to be empty when VHD is default") + } + } + }, + }, + { + name: "boot with initrd only (no VHD)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: initrdOnlyPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Should boot from initrd + if spec.BootOptions.LinuxKernelDirect != nil { + if spec.BootOptions.LinuxKernelDirect.InitRdPath == "" { + t.Error("expected InitRdPath to be set when only initrd is present") + } + } + }, + }, + { + name: "kernel direct with only uncompressed kernel", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: uncompressedOnlyPath, + }, + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.BootOptions.LinuxKernelDirect == nil { + t.Fatal("expected LinuxKernelDirect to be set") + } + if !strings.Contains(spec.BootOptions.LinuxKernelDirect.KernelFilePath, vmutils.UncompressedKernelFile) { + t.Errorf("expected kernel path to contain %s, got %s", vmutils.UncompressedKernelFile, spec.BootOptions.LinuxKernelDirect.KernelFilePath) + } + }, + }, + { + name: "kernel direct with regular kernel (no uncompressed)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: noKernelDirectPath, + }, + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.BootOptions.LinuxKernelDirect == nil { + t.Fatal("expected LinuxKernelDirect to be set") + } + if !strings.Contains(spec.BootOptions.LinuxKernelDirect.KernelFilePath, vmutils.KernelFile) { + t.Errorf("expected kernel path to contain %s", vmutils.KernelFile) + } + }, + }, + { + name: "UEFI boot mode (kernel direct disabled)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: initrdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.KernelDirectBoot: "false", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.BootOptions.Uefi == nil { + t.Error("expected UEFI boot mode") + } + if spec.BootOptions.LinuxKernelDirect != nil { + t.Error("expected LinuxKernelDirect to be nil for UEFI boot") + } + }, + }, + { + name: "dm-verity mode with SCSI boot", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.DmVerityMode: "true", + shimannotations.DmVerityCreateArgs: "test-dm-verity-args", + shimannotations.VPMemCount: "0", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if !strings.Contains(getKernelArgs(spec), "dm-mod.create") { + t.Error("expected dm-verity configuration in kernel args") + } + }, + }, + { + name: "SCSI boot from VHD without dm-verity", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.VPMemCount: "0", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Should boot from SCSI + if spec.DeviceOptions.ScsiControllers == nil || len(spec.DeviceOptions.ScsiControllers) == 0 { + t.Error("expected SCSI controllers to be configured") + } + }, + }, + { + name: "VPMem boot from VHD", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.VPMemCount: "32", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // VPMem should be configured for VHD + if spec.DeviceOptions.VpMemController == nil { + t.Fatal("expected VpMemController to be configured") + } + if len(spec.DeviceOptions.VpMemController.Devices) == 0 { + t.Error("expected VPMem device to be configured for rootfs") + } + }, + }, + { + name: "writable overlay dirs with VHD", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + iannotations.WritableOverlayDirs: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if !strings.Contains(getKernelArgs(spec), "-w") { + t.Error("expected -w flag in kernel args for writable overlay dirs") + } + }, + }, + { + name: "disable time sync service", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.DisableLCOWTimeSyncService: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if !strings.Contains(getKernelArgs(spec), "-disable-time-sync") { + t.Error("expected -disable-time-sync flag in kernel args") + } + }, + }, + { + name: "process dump location", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.ContainerProcessDumpLocation: "/tmp/dumps", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + kernelArgs := getKernelArgs(spec) + if !strings.Contains(kernelArgs, "-core-dump-location") || !strings.Contains(kernelArgs, "/tmp/dumps") { + t.Error("expected -core-dump-location in kernel args") + } + }, + }, + { + name: "VPCIEnabled annotation enables PCI", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + annotations: map[string]string{ + shimannotations.VPCIEnabled: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Verify kernel args do NOT contain "pci=off" + if strings.Contains(getKernelArgs(spec), "pci=off") { + t.Error("expected PCI to be enabled, but found pci=off in kernel args") + } + }, + }, + { + name: "VPCIEnabled defaults to false (PCI disabled)", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Verify kernel args contain "pci=off" + if !strings.Contains(getKernelArgs(spec), "pci=off") { + t.Error("expected pci=off in kernel args when VPCIEnabled is false") + } + }, + }, + { + name: "gcs log level from options", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + LogLevel: "debug", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if !strings.Contains(getKernelArgs(spec), "-loglevel debug") { + t.Error("expected -loglevel debug in kernel args") + } + }, + }, + { + name: "scrub logs option", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: vhdOnlyPath, + ScrubLogs: true, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if !strings.Contains(getKernelArgs(spec), "-scrub-logs") { + t.Error("expected -scrub-logs in kernel args") + } + }, + }, + } + + runTestCases(t, ctx, nil, tests) +} + +// TestGenerateSandboxSpecs_DeviceOptions tests various device option scenarios +func TestGenerateSandboxSpecs_DeviceOptions(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + tests := []specTestCase{ + { + name: "assigned devices with unsupported type", + devices: []specs.WindowsDevice{ + { + ID: "some-device-id", + IDType: "unsupported-type", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // Unsupported device types should be skipped + if len(spec.DeviceOptions.VpciDevices) != 0 { + t.Errorf("expected 0 vpci devices (unsupported type should be skipped), got %d", len(spec.DeviceOptions.VpciDevices)) + } + }, + }, + { + name: "assigned devices - legacy vpci type", + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_ABCD&DEV_1234", + IDType: "vpci", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if len(spec.DeviceOptions.VpciDevices) != 1 { + t.Errorf("expected 1 assigned device, got %d", len(spec.DeviceOptions.VpciDevices)) + } + }, + }, + { + name: "confidential VM disables vPCI devices", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + }, + devices: []specs.WindowsDevice{ + { + ID: "PCIP\\VEN_1234&DEV_5678", + IDType: "vpci-instance-id", + }, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // vPCI devices should be disabled in confidential VMs + if len(spec.DeviceOptions.VpciDevices) != 0 { + t.Error("expected vPCI devices to be disabled in confidential VMs") + } + }, + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} + +// TestGenerateSandboxSpecs_HvSocketServiceTable tests HvSocket service table parsing +func TestGenerateSandboxSpecs_HvSocketServiceTable(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + tests := []specTestCase{ + { + name: "HvSocket service table parsing from annotations", + annotations: map[string]string{ + iannotations.UVMHyperVSocketConfigPrefix + "12345678-1234-1234-1234-123456789abc": `{"BindSecurityDescriptor":"D:P(A;;FA;;;WD)","ConnectSecurityDescriptor":"D:P(A;;FA;;;WD)","AllowWildcardBinds":true}`, + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // HvSocket service table should be parsed + if spec.AdditionalConfig.AdditionalHypervConfig == nil { + t.Error("expected AdditionalHypervConfig to be set") + } + // Check that the service was added to the table + found := false + for guid := range spec.AdditionalConfig.AdditionalHypervConfig { + if strings.Contains(strings.ToLower(guid), "12345678-1234-1234-1234-123456789abc") { + found = true + break + } + } + if !found { + t.Error("expected HvSocket service GUID to be in AdditionalHypervConfig") + } + }, + }, + { + name: "confidential VM adds extra vsock ports", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + iannotations.ExtraVSockPorts: "8000,8001,8002", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + // In confidential mode, extra vsock ports should be added to HvSocket service table + if spec.AdditionalConfig.AdditionalHypervConfig == nil { + t.Fatal("expected AdditionalHypervConfig to be set") + } + // Should contain at least the default ports plus extra ports + minExpectedPorts := 4 + 3 // 4 default (entropy, log, gcs, bridge) + 3 extra + if len(spec.AdditionalConfig.AdditionalHypervConfig) < minExpectedPorts { + t.Errorf("expected at least %d vsock ports, got %d", minExpectedPorts, len(spec.AdditionalConfig.AdditionalHypervConfig)) + } + }, + }, + { + name: "confidential VM HvSocket default ports", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.AdditionalConfig.AdditionalHypervConfig == nil { + t.Fatal("expected AdditionalHypervConfig to be set") + } + // Should contain default ports: entropy (1), log (109), gcs (0x40000000), bridge (0x40000001) + if len(spec.AdditionalConfig.AdditionalHypervConfig) < 4 { + t.Errorf("expected at least 4 default vsock ports, got %d", len(spec.AdditionalConfig.AdditionalHypervConfig)) + } + }, + }, + { + name: "confidential VM with HclEnabled annotation", + annotations: map[string]string{ + shimannotations.LCOWSecurityPolicy: "eyJ0ZXN0IjoidGVzdCJ9", + shimannotations.LCOWHclEnabled: "true", + }, + validate: func(t *testing.T, spec *shimsandbox.Spec) { + if spec.ConfidentialOptions == nil || spec.ConfidentialOptions.SecuritySettings == nil { + t.Fatal("expected SecuritySettings to be set") + } + if spec.ConfidentialOptions.SecuritySettings.Isolation == nil { + t.Fatal("expected Isolation settings to be set") + } + if spec.ConfidentialOptions.SecuritySettings.Isolation.HclEnabled == nil { + t.Fatal("expected HclEnabled to be set") + } + if !*spec.ConfidentialOptions.SecuritySettings.Isolation.HclEnabled { + t.Error("expected HclEnabled to be true") + } + // VMGS template path should be set in confidential mode + if spec.ConfidentialOptions.VmgsTemplatePath == "" { + t.Error("expected VMGS template path to be set in confidential mode") + } + }, + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} diff --git a/cmd/containerd-shim-lcow-v1/specs/utils.go b/cmd/containerd-shim-lcow-v1/specs/utils.go new file mode 100644 index 0000000000..84d830caca --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/specs/utils.go @@ -0,0 +1,134 @@ +//go:build windows + +package specs + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + shimsandbox "github.com/Microsoft/hcsshim/api/sandbox/v1" + "github.com/Microsoft/hcsshim/internal/copyfile" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" + "github.com/Microsoft/hcsshim/internal/security" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + + "github.com/sirupsen/logrus" +) + +// EnrichConfidentialSpecs enriches the sandbox specification with confidential computing configuration. +// It prepares the necessary files and device attachments required for running a confidential LCOW VM. +// +// This function performs the following operations: +// - Copies the VMGS (Virtual Machine Guest State) template file to the bundle directory +// - Copies the DM-Verity protected rootfs VHD to the bundle directory +// - Grants VM group access permissions to the copied files +// - Configures SCSI controller and attaches the dm-verity rootfs as a virtual disk +// - Sets up the guest state configuration for the confidential VM +// +// EnrichConfidentialSpecs would be called during the SandboxCreate API call in the running shim process. +func EnrichConfidentialSpecs(ctx context.Context, bundlePath string, spec *shimsandbox.Spec) (err error) { + specJSON, _ := json.Marshal(spec) + log.G(ctx).WithFields(logrus.Fields{ + "bundlePath": bundlePath, + "spec": string(specJSON), + }).Debug("EnrichConfidentialSpecs: enriching confidential sandbox specification") + + // Step 1: Copy VMGS template file. + vmgsFileName := filepath.Base(spec.ConfidentialOptions.VmgsTemplatePath) + vmgsFileFullPath := filepath.Join(bundlePath, vmgsFileName) + + log.G(ctx).WithFields(logrus.Fields{ + "source": spec.ConfidentialOptions.VmgsTemplatePath, + "dest": vmgsFileFullPath, + }).Debug("copying VMGS template file") + + // Copy the VMGS template to the bundle directory. + if err := copyfile.CopyFile(ctx, spec.ConfidentialOptions.VmgsTemplatePath, vmgsFileFullPath, true); err != nil { + return fmt.Errorf("failed to copy VMGS template file: %w", err) + } + // Clean up VMGS file if we encounter an error later. + defer func() { + if err != nil { + os.Remove(vmgsFileFullPath) + } + }() + + // Step 2: Copy DM-Verity rootfs template file. + dmVerityRootFsFullPath := filepath.Join(bundlePath, vmutils.DefaultDmVerityRootfsVhd) + + log.G(ctx).WithFields(logrus.Fields{ + "source": spec.ConfidentialOptions.DmVerityRootfsTemplatePath, + "dest": dmVerityRootFsFullPath, + }).Debug("copying DM-Verity rootfs template file") + + // Copy the DM-Verity rootfs template to the bundle directory. + if err := copyfile.CopyFile(ctx, spec.ConfidentialOptions.DmVerityRootfsTemplatePath, dmVerityRootFsFullPath, true); err != nil { + return fmt.Errorf("failed to copy DM Verity rootfs template file: %w", err) + } + // Clean up DM-Verity file if we encounter an error later. + defer func() { + if err != nil { + os.Remove(dmVerityRootFsFullPath) + } + }() + + // Step 3: Grant VM group access to the copied files + // Both files need to be accessible by the VM group for the confidential VM to use them. + log.G(ctx).Debug("granting VM group access to confidential files") + for _, filename := range []string{ + vmgsFileFullPath, + dmVerityRootFsFullPath, + } { + if err := security.GrantVmGroupAccessWithMask(filename, security.AccessMaskAll); err != nil { + return fmt.Errorf("failed to grant VM group access ALL: %w", err) + } + } + + // Step 4: Initialize device options and SCSI controllers. + // Initialize SCSI controllers if not already present. + if spec.DeviceOptions == nil { + spec.DeviceOptions = &shimsandbox.DeviceOptions{} + } + if spec.DeviceOptions.ScsiControllers == nil { + spec.DeviceOptions.ScsiControllers = make(map[string]*shimsandbox.ScsiController) + } + + // Step 5: Configure SCSI controller 0 + // Ensure SCSI controller 0 exists (using the first GUID from the predefined list). + scsiController0 := guestrequest.ScsiControllerGuids[0] + if spec.DeviceOptions.ScsiControllers[scsiController0] == nil { + spec.DeviceOptions.ScsiControllers[scsiController0] = &shimsandbox.ScsiController{ + Attachments: make(map[string]*shimsandbox.ScsiAttachment), + } + } + + // Step 6: Attach the dm-verity rootfs VHD to SCSI controller 0, LUN 0 + // This makes the verified rootfs available to the guest VM as a read-only disk + spec.DeviceOptions.ScsiControllers[scsiController0].Attachments["0"] = &shimsandbox.ScsiAttachment{ + Type: "VirtualDisk", + Path: dmVerityRootFsFullPath, + ReadOnly: true, // Read-only to ensure integrity of the verified rootfs + } + + // Log the SCSI configuration for debugging purposes + log.G(ctx).WithFields(logrus.Fields{ + "controller": scsiController0, + "lun": "0", + "path": dmVerityRootFsFullPath, + }).Debug("configured SCSI attachment for dm-verity rootfs in confidential mode") + + // Step 7: Configure guest state for the confidential VM + // Set up the VMGS file as the source for guest state + spec.ConfidentialOptions.GuestState = &shimsandbox.GuestState{ + GuestStateFilePath: vmgsFileFullPath, + GuestStateFileType: "FileMode", + ForceTransientState: true, // Tell HCS that this is just the source of the images, not ongoing state + } + + log.G(ctx).Info("confidential sandbox specification enriched successfully") + return nil +}