From 4ec7a5dd8c1bfac999a4c89e86c7947cda439e55 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 23 Apr 2026 23:03:24 -0700 Subject: [PATCH 01/11] feat(vm): derive guest rootfs from sandbox images Signed-off-by: Drew Newberry --- .github/workflows/release-vm-dev.yml | 45 +- Cargo.lock | 444 +++++++- architecture/custom-vm-runtime.md | 68 +- architecture/gateway.md | 4 +- crates/openshell-bootstrap/Cargo.toml | 1 + crates/openshell-bootstrap/src/build.rs | 206 +++- crates/openshell-cli/src/run.rs | 102 +- crates/openshell-driver-vm/Cargo.toml | 4 + crates/openshell-driver-vm/README.md | 40 +- crates/openshell-driver-vm/build.rs | 47 +- crates/openshell-driver-vm/src/driver.rs | 1033 ++++++++++++++++++- crates/openshell-driver-vm/src/main.rs | 4 + crates/openshell-driver-vm/src/rootfs.rs | 271 ++++- crates/openshell-driver-vm/start.sh | 204 +++- crates/openshell-server/src/cli.rs | 1 + crates/openshell-server/src/compute/vm.rs | 7 + deploy/docker/Dockerfile.driver-vm-macos | 2 +- docs/get-started/quickstart.mdx | 7 + docs/sandboxes/community-sandboxes.mdx | 25 +- tasks/scripts/vm/build-supervisor-bundle.sh | 100 ++ tasks/scripts/vm/vm-setup.sh | 7 +- tasks/vm.toml | 5 + 22 files changed, 2382 insertions(+), 245 deletions(-) create mode 100755 tasks/scripts/vm/build-supervisor-bundle.sh diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 85750b59f..0529aa4f7 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -421,7 +421,7 @@ jobs: # --------------------------------------------------------------------------- build-driver-vm-linux: name: Build Driver VM (Linux ${{ matrix.arch }}) - needs: [compute-versions, download-kernel-runtime, build-rootfs] + needs: [compute-versions, download-kernel-runtime] strategy: matrix: include: @@ -477,12 +477,6 @@ jobs: name: kernel-runtime-tarballs path: runtime-download/ - - name: Download rootfs tarball - uses: actions/download-artifact@v4 - with: - name: rootfs-${{ matrix.arch }} - path: rootfs-download/ - - name: Stage compressed runtime for embedding run: | set -euo pipefail @@ -504,12 +498,15 @@ jobs: zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" done - # Copy rootfs tarball (already zstd-compressed) - cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" - echo "Staged compressed artifacts:" ls -lah "$COMPRESSED_DIR" + - name: Build bundled supervisor + run: | + set -euo pipefail + OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" \ + tasks/scripts/vm/build-supervisor-bundle.sh --arch "${{ matrix.guest_arch }}" + - name: Scope workspace to driver-vm crates run: | set -euo pipefail @@ -551,7 +548,7 @@ jobs: # --------------------------------------------------------------------------- build-driver-vm-macos: name: Build Driver VM (macOS) - needs: [compute-versions, download-kernel-runtime, build-rootfs] + needs: [compute-versions, download-kernel-runtime] runs-on: build-amd64 timeout-minutes: 60 container: @@ -591,12 +588,6 @@ jobs: name: kernel-runtime-tarballs path: runtime-download/ - - name: Download rootfs tarball (arm64) - uses: actions/download-artifact@v4 - with: - name: rootfs-arm64 - path: rootfs-download/ - - name: Prepare compressed runtime directory run: | set -euo pipefail @@ -619,12 +610,24 @@ jobs: zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" done - # The macOS VM guest is always Linux ARM64, so use the arm64 rootfs - cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" - echo "Staged macOS compressed artifacts:" ls -lah "$COMPRESSED_DIR" + - name: Build bundled supervisor + run: | + set -euo pipefail + docker buildx build \ + --file deploy/docker/Dockerfile.images \ + --platform linux/arm64 \ + --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \ + --build-arg OPENSHELL_IMAGE_TAG=dev \ + --target supervisor-output \ + --output type=local,dest=supervisor-out/ \ + . + + zstd -19 -T0 -f supervisor-out/openshell-sandbox \ + -o "${PWD}/target/vm-runtime-compressed-macos/openshell-sandbox.zst" + - name: Build macOS binary via Docker (osxcross) run: | set -euo pipefail @@ -776,7 +779,7 @@ jobs: ### VM Compute Driver Binaries - `openshell-driver-vm` binaries with embedded kernel runtime and sandbox rootfs. + `openshell-driver-vm` binaries with embedded kernel runtime and bundled sandbox supervisor. Launched by the gateway when `--drivers=vm` is configured. Rebuilt on every push to main alongside the openshell-vm binaries. diff --git a/Cargo.lock b/Cargo.lock index 0e59eb64f..f93440a39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,6 +621,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cexpr" version = "0.6.0" @@ -761,6 +767,16 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "compact_str" version = "0.7.1" @@ -808,6 +824,27 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -1175,6 +1212,37 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "dialoguer" version = "0.11.0" @@ -1648,6 +1716,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "ghash" version = "0.5.1" @@ -1861,6 +1941,15 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-auth" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "150fa4a9462ef926824cf4519c84ed652ca8f4fbae34cb8af045b5cbcaf98822" +dependencies = [ + "memchr", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2324,6 +2413,50 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys 0.3.1", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -2372,6 +2505,20 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "jsonwebtoken" +version = "10.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" +dependencies = [ + "base64 0.22.1", + "getrandom 0.2.17", + "js-sys", + "serde", + "serde_json", + "signature 2.2.0", +] + [[package]] name = "k8s-openapi" version = "0.21.1" @@ -2385,6 +2532,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kube" version = "0.90.0" @@ -3035,6 +3197,60 @@ dependencies = [ "memchr", ] +[[package]] +name = "oci-client" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b7f8deaffcd3b0e3baf93dddcab3d18b91d46dc37d38a8b170089b234de5bb3" +dependencies = [ + "bytes", + "chrono", + "futures-util", + "http", + "http-auth", + "jsonwebtoken", + "lazy_static", + "oci-spec", + "olpc-cjson", + "regex", + "reqwest 0.13.2", + "serde", + "serde_json", + "sha2 0.10.9", + "thiserror 2.0.18", + "tokio", + "tracing", + "unicase", +] + +[[package]] +name = "oci-spec" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8445a2631507cec628a15fdd6154b54a3ab3f20ed4fe9d73a3b8b7a4e1ba03a" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", +] + +[[package]] +name = "olpc-cjson" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "696183c9b5fe81a7715d074fd632e8bd46f4ccc0231a3ed7fc580a80de5f7083" +dependencies = [ + "serde", + "serde_json", + "unicode-normalization", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -3071,6 +3287,7 @@ dependencies = [ "tempfile", "tokio", "tracing", + "url", ] [[package]] @@ -3100,7 +3317,7 @@ dependencies = [ "owo-colors", "prost-types", "rcgen", - "reqwest", + "reqwest 0.12.28", "rustls", "rustls-pemfile", "serde", @@ -3204,17 +3421,21 @@ name = "openshell-driver-vm" version = "0.0.0" dependencies = [ "clap", + "flate2", "futures", "libc", "libloading", "miette", "nix", + "oci-client", + "openshell-bootstrap", "openshell-core", "openshell-vfio", "polling", "prost-types", "serde", "serde_json", + "sha2 0.10.9", "tar", "tokio", "tokio-stream", @@ -3274,7 +3495,7 @@ version = "0.0.0" dependencies = [ "bytes", "openshell-core", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_yml", @@ -3366,7 +3587,7 @@ dependencies = [ "prost-types", "rand 0.9.4", "rcgen", - "reqwest", + "reqwest 0.12.28", "russh", "rustls", "rustls-pemfile", @@ -4024,6 +4245,7 @@ version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -4162,7 +4384,7 @@ dependencies = [ "lru", "paste", "stability", - "strum", + "strum 0.26.3", "unicode-segmentation", "unicode-truncate", "unicode-width 0.1.14", @@ -4297,6 +4519,47 @@ dependencies = [ "webpki-roots 1.0.7", ] +[[package]] +name = "reqwest" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower 0.5.3", + "tower-http 0.6.8", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + [[package]] name = "rfc6979" version = "0.4.0" @@ -4500,6 +4763,7 @@ version = "0.23.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", @@ -4540,12 +4804,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.103.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted 0.9.0", @@ -4572,6 +4864,15 @@ dependencies = [ "cipher", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.29" @@ -5283,9 +5584,15 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", ] +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + [[package]] name = "strum_macros" version = "0.26.4" @@ -5299,6 +5606,18 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "subtle" version = "2.6.1" @@ -5951,6 +6270,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + [[package]] name = "unicode-bidi" version = "0.3.18" @@ -6106,6 +6431,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -6222,6 +6557,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -6254,6 +6602,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -6298,6 +6655,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -6405,6 +6771,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -6450,6 +6825,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -6507,6 +6897,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6525,6 +6921,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6543,6 +6945,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6573,6 +6981,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6591,6 +7005,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6609,6 +7029,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6627,6 +7053,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6836,7 +7268,7 @@ dependencies = [ "bindgen", "cmake", "pkg-config", - "reqwest", + "reqwest 0.12.28", "serde_json", "zip", ] diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 045ee2e9a..18594d4af 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -20,8 +20,9 @@ kernel. The driver is spawned by `openshell-gateway` as a subprocess, talks to it over a Unix domain socket (`compute-driver.sock`) with the `openshell.compute.v1.ComputeDriver` gRPC surface, and manages per-sandbox -microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox rootfs are -embedded directly in the driver binary — no sibling files required at runtime. +microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox +supervisor are embedded directly in the driver binary; each sandbox guest +rootfs is derived from a container image at create time. ## Architecture @@ -30,7 +31,7 @@ graph TD subgraph Host["Host (macOS / Linux)"] GATEWAY["openshell-gateway
(compute::vm::spawn)"] DRIVER["openshell-driver-vm
(compute-driver.sock)"] - EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ sandbox rootfs.tar.zst"] + EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ openshell-sandbox.zst"] GVP["gvproxy (per sandbox)
virtio-net · DHCP · DNS"] GATEWAY <-->|gRPC over UDS| DRIVER @@ -58,8 +59,8 @@ never binds a host-side TCP listener. ## Embedded Runtime -`openshell-driver-vm` embeds the VM runtime libraries and the sandbox rootfs as -zstd-compressed byte arrays, extracting on demand: +`openshell-driver-vm` embeds the VM runtime libraries and the sandbox +supervisor as zstd-compressed byte arrays, extracting on demand: ```text ~/.local/share/openshell/vm-runtime// # libkrun / libkrunfw / gvproxy @@ -74,14 +75,17 @@ Old runtime cache versions are cleaned up when a new version is extracted. ### Sandbox rootfs preparation -The rootfs tarball the driver embeds starts from the same minimal Ubuntu base -used across the project, and is **rewritten into a supervisor-only sandbox -guest** during extraction: +Each VM sandbox starts from either a registry image fetched directly over OCI or +a local rootfs tar artifact exported by the CLI for Dockerfile-based `--from` +sources, then the driver **rewrites that filesystem into a supervisor-only +sandbox guest** before caching it: -- k3s state and Kubernetes manifests are stripped out - `/srv/openshell-vm-sandbox-init.sh` is installed as the guest entrypoint -- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy, - no CNI plugins +- the bundled `openshell-sandbox` binary is copied into + `/opt/openshell/bin/openshell-sandbox` +- k3s state and Kubernetes manifests are stripped out if the image contains them +- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy, no + CNI plugins See `crates/openshell-driver-vm/src/rootfs.rs` for the rewrite logic and `crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh` for the init @@ -95,6 +99,44 @@ spawns one launcher per sandbox as a subprocess, which in turn starts `gvproxy` and calls `krun_start_enter` to boot the guest. Keeping the launcher in the same binary means the driver ships a single artifact for both roles. +When a sandbox sets `template.image` through `openshell sandbox create --from ...`, +the VM driver treats that image as the base guest rootfs source for that +sandbox. When `template.image` is omitted, the gateway fills it from the VM +driver's advertised `default_image`, which matches the gateway's configured +sandbox image. The driver: + +- resolves the image on the gateway host without Docker for registry and + community image refs +- for local Dockerfile sources, the CLI builds through the host Docker socket + and hands the VM driver a local rootfs tar artifact instead of a Docker tag +- unpacks the image filesystem, injects the VM sandbox init/supervisor files, + and validates required guest tools such as `bash`, `mount`, `ip`, and `sed` +- caches the prepared guest rootfs under + `/images//rootfs.tar` +- extracts a private runtime copy under + `/sandboxes//rootfs` + +The cache key uses an immutable image identity: repo digest when available, +otherwise a SHA-256 fingerprint of the local rootfs tar artifact. +Different VM sandboxes can use different base images concurrently because the +shared cache is per image, not global for the driver. Cached prepared rootfs +entries remain on disk until the operator removes them from the VM driver state +directory. + +Docker is therefore no longer required for VM sandboxes created from registry or +community image refs. It is only required on the CLI host when the source is a +local Dockerfile or build context. + +There is no embedded guest rootfs fallback anymore. VM sandboxes therefore +require either `template.image` or a configured default sandbox image. This is +still replace-the-rootfs semantics, so VM images must remain base-compatible +with the sandbox guest init path. Distroless or `scratch` images are not +expected to work. + +The separate `openshell-vm` binary still uses `vm:rootfs` to build a standalone +embedded guest filesystem, but `openshell-driver-vm` no longer consumes that +artifact. + ## Network Plane The driver launches a **dedicated `gvproxy` instance per sandbox** to provide the @@ -262,8 +304,8 @@ host platform. ### Driver Binary (`release-vm-dev.yml`) Builds the self-contained `openshell-driver-vm` binary for every platform, -with the kernel runtime + sandbox rootfs embedded. Runs on every push to -`main` that touches VM-related crates. +with the kernel runtime + bundled sandbox supervisor embedded. Runs on every +push to `main` that touches VM-related crates. The `download-kernel-runtime` job pulls the current `vm-runtime-.tar.zst` from the `vm-dev` release; the `build-openshell-driver-vm` jobs set diff --git a/architecture/gateway.md b/architecture/gateway.md index e83640a43..5392b640a 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -135,7 +135,7 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d | `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods | | `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) | | `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes`, `docker`, and `vm`. | -| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state | +| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, runtime state, and shared image-rootfs cache | | `--driver-dir` | `OPENSHELL_DRIVER_DIR` | unset | Override directory for `openshell-driver-vm`. When unset, the gateway searches `~/.local/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling binary. | | `--vm-krun-log-level` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun log level for VM helper processes | | `--vm-driver-vcpus` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | Default vCPU count for VM sandboxes | @@ -618,7 +618,7 @@ The Docker driver (`crates/openshell-driver-docker/src/lib.rs`) is an in-process `VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket. -- **Create**: The VM driver process allocates a sandbox-specific rootfs from its own embedded `rootfs.tar.zst`, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor. +- **Create**: The VM driver process exports the selected sandbox image from the local Docker daemon, rewrites it into a sandbox-specific guest rootfs, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor. - **Networking**: The helper starts an embedded `gvproxy`, wires it into libkrun as virtio-net, and gives the guest outbound connectivity. No inbound TCP listener is needed — the supervisor reaches the gateway over its outbound `ConnectSupervisor` stream. - **Gateway callback**: The guest init script configures `eth0` for gvproxy networking, seeds `/etc/hosts` so `host.openshell.internal` resolves to the gvproxy gateway IP (`192.168.127.1`), preserves gvproxy's legacy `host.containers.internal` / `host.docker.internal` DNS answers, prefers the configured `OPENSHELL_GRPC_ENDPOINT`, and falls back to those aliases or the raw gateway IP when local hostname resolution is unavailable on macOS. - **Guest boot**: The sandbox guest runs a minimal init script that starts `openshell-sandbox` directly as PID 1 inside the VM. diff --git a/crates/openshell-bootstrap/Cargo.toml b/crates/openshell-bootstrap/Cargo.toml index 942ffc48b..30fd4fbfc 100644 --- a/crates/openshell-bootstrap/Cargo.toml +++ b/crates/openshell-bootstrap/Cargo.toml @@ -20,6 +20,7 @@ miette = { workspace = true } rcgen = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +url = { workspace = true } tar = "0.4" tempfile = "3" tokio = { workspace = true } diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index fb9b4a63d..ecc4bffc9 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -1,54 +1,117 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Build and push container images into a k3s gateway. +//! Build and export container images for gateway runtimes. //! //! This module wraps bollard's `build_image()` API to build a container image -//! from a Dockerfile and build context, then reuses the existing push pipeline -//! to import the image into the gateway's containerd runtime. +//! from a Dockerfile and build context. Kubernetes deployments reuse the +//! existing push pipeline to import the image into the gateway's containerd +//! runtime, while the VM backend can export the built image as a rootfs tar. use std::collections::HashMap; -use std::path::Path; +use std::path::{Path, PathBuf}; use bollard::Docker; -use bollard::query_parameters::BuildImageOptionsBuilder; +use bollard::models::ContainerCreateBody; +use bollard::query_parameters::{ + BuildImageOptionsBuilder, CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder, +}; use futures::StreamExt; use miette::{IntoDiagnostic, Result, WrapErr}; +use tokio::io::AsyncWriteExt; +use url::{Position, Url}; use crate::constants::container_name; use crate::push::push_local_images; -/// Build a container image from a Dockerfile and push it into the gateway. +/// Pseudo-image URI scheme used to hand a local rootfs tar artifact to the VM driver. +pub const ROOTFS_TAR_IMAGE_REF_SCHEME: &str = "openshell-rootfs-tar"; + +/// Build a container image from a Dockerfile using the local Docker daemon. /// -/// This is used by `openshell sandbox create --from `. It: -/// 1. Creates a tar archive of the build context directory. -/// 2. Sends it to the local Docker daemon via `build_image()`. -/// 3. Pushes the resulting image into the gateway's containerd via the -/// existing `push_local_images()` pipeline. +/// This is used by `openshell sandbox create --from ` for both the +/// Kubernetes and VM backends. The image remains available in the local Docker +/// daemon so the caller can either hand the resulting tag directly to the VM +/// backend or import it into a local gateway containerd runtime. #[allow(clippy::implicit_hasher)] -pub async fn build_and_push_image( +pub async fn build_local_image( dockerfile_path: &Path, tag: &str, context_dir: &Path, - gateway_name: &str, build_args: &HashMap, on_log: &mut impl FnMut(String), ) -> Result<()> { - // 1. Build the image locally. on_log(format!( "Building image {tag} from {}", dockerfile_path.display() )); build_image(dockerfile_path, tag, context_dir, build_args, on_log).await?; on_log(format!("Built image {tag}")); + Ok(()) +} + +/// Encode a local rootfs tar path as an internal image reference understood by the VM driver. +pub fn encode_rootfs_tar_image_ref(path: &Path) -> Result { + let canonical = path + .canonicalize() + .into_diagnostic() + .wrap_err_with(|| format!("failed to resolve rootfs tar path {}", path.display()))?; + let file_url = Url::from_file_path(&canonical) + .map_err(|_| miette::miette!("failed to encode rootfs tar path {}", canonical.display()))?; + Ok(format!( + "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{}", + &file_url[Position::BeforePath..] + )) +} + +/// Decode a VM-driver rootfs tar image reference back to a local filesystem path. +pub fn decode_rootfs_tar_image_ref(image_ref: &str) -> Option { + let remainder = image_ref.strip_prefix(&format!("{ROOTFS_TAR_IMAGE_REF_SCHEME}:"))?; + let file_url = format!("file:{remainder}"); + Url::parse(&file_url).ok()?.to_file_path().ok() +} + +/// Export a locally-built Docker image as a persistent rootfs tar artifact for the VM driver. +pub async fn export_local_image_rootfs( + image_ref: &str, + on_log: &mut impl FnMut(String), +) -> Result { + let temp = tempfile::Builder::new() + .prefix("openshell-vm-rootfs-") + .suffix(".tar") + .tempfile() + .into_diagnostic() + .wrap_err("failed to allocate temporary VM rootfs artifact")?; + let temp_path = temp.path().to_path_buf(); + let (_file, output_path) = temp.keep().into_diagnostic().wrap_err_with(|| { + format!( + "failed to persist temporary VM rootfs artifact {}", + temp_path.display() + ) + })?; - // 2. Push into the gateway. + on_log(format!( + "Exporting built image {image_ref} as VM rootfs artifact {}", + output_path.display() + )); + export_local_image_rootfs_to_path(image_ref, &output_path).await?; + on_log(format!( + "Exported VM rootfs artifact {}", + output_path.display() + )); + Ok(output_path) +} + +/// Push a locally-built image into the gateway's containerd runtime. +#[allow(clippy::implicit_hasher)] +pub async fn push_image_into_gateway( + tag: &str, + gateway_name: &str, + on_log: &mut impl FnMut(String), +) -> Result<()> { on_log(format!( "Pushing image {tag} into gateway \"{gateway_name}\"" )); - // Use the long-timeout Docker client so `docker save` of multi-GB images - // doesn't trip the 120s bollard default mid-stream. Override with - // OPENSHELL_DOCKER_TIMEOUT_SECS=. let local_docker = crate::docker::connect_local_for_large_transfers() .into_diagnostic() .wrap_err("failed to connect to local Docker daemon")?; @@ -60,6 +123,28 @@ pub async fn build_and_push_image( Ok(()) } +/// Build a container image from a Dockerfile and push it into the gateway. +/// +/// This is used by `openshell sandbox create --from ` when the +/// active gateway is the local Kubernetes deployment. It: +/// 1. Creates a tar archive of the build context directory. +/// 2. Sends it to the local Docker daemon via `build_image()`. +/// 3. Pushes the resulting image into the gateway's containerd via the +/// existing `push_local_images()` pipeline. +#[allow(clippy::implicit_hasher)] +pub async fn build_and_push_image( + dockerfile_path: &Path, + tag: &str, + context_dir: &Path, + gateway_name: &str, + build_args: &HashMap, + on_log: &mut impl FnMut(String), +) -> Result<()> { + build_local_image(dockerfile_path, tag, context_dir, build_args, on_log).await?; + push_image_into_gateway(tag, gateway_name, on_log).await?; + Ok(()) +} + /// Build a container image using the local Docker daemon. /// /// Creates a tar archive of `context_dir`, sends it to Docker with the @@ -127,6 +212,79 @@ async fn build_image( Ok(()) } +async fn export_local_image_rootfs_to_path(image_ref: &str, tar_path: &Path) -> Result<()> { + let docker = Docker::connect_with_local_defaults() + .into_diagnostic() + .wrap_err("failed to connect to local Docker daemon")?; + let container_name = format!( + "openshell-rootfs-export-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() + ); + let create_options = CreateContainerOptionsBuilder::default() + .name(container_name.as_str()) + .build(); + let container = docker + .create_container( + Some(create_options), + ContainerCreateBody { + image: Some(image_ref.to_string()), + ..Default::default() + }, + ) + .await + .into_diagnostic() + .wrap_err_with(|| { + format!("failed to create temporary export container for image {image_ref}") + })?; + let container_id = container.id; + + let export_result = async { + if let Some(parent) = tar_path.parent() { + tokio::fs::create_dir_all(parent) + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to create {}", parent.display()))?; + } + let mut file = tokio::fs::File::create(tar_path) + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to create {}", tar_path.display()))?; + let mut stream = docker.export_container(&container_id); + while let Some(chunk) = stream.next().await { + let chunk = chunk + .into_diagnostic() + .wrap_err_with(|| format!("failed to export image {image_ref}"))?; + file.write_all(&chunk) + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to write {}", tar_path.display()))?; + } + file.flush() + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to flush {}", tar_path.display())) + } + .await; + + let cleanup_result = docker + .remove_container( + &container_id, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await; + + match (export_result, cleanup_result) { + (Ok(()), Ok(())) => Ok(()), + (Err(err), _) => Err(err), + (Ok(()), Err(err)) => Err(err).into_diagnostic().wrap_err_with(|| { + format!("failed to remove temporary export container for {image_ref}") + }), + } +} + /// Create a tar archive of a directory for use as a Docker build context. /// /// Walks `context_dir` recursively, respects a `.dockerignore` file if present, @@ -468,4 +626,16 @@ mod tests { assert!(is_ignored("node_modules", true, &patterns)); assert!(is_ignored("node_modules/foo.js", false, &patterns)); } + + #[test] + fn encode_and_decode_rootfs_tar_image_ref_round_trip() { + let dir = tempfile::tempdir().unwrap(); + let tar_path = dir.path().join("rootfs tar.tar"); + fs::write(&tar_path, "rootfs").unwrap(); + + let encoded = encode_rootfs_tar_image_ref(&tar_path).unwrap(); + let decoded = decode_rootfs_tar_image_ref(&encoded).unwrap(); + + assert_eq!(decoded, tar_path.canonicalize().unwrap()); + } } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 8f96124aa..cd651eaa8 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2657,15 +2657,29 @@ fn image_requests_gpu(image: &str) -> bool { image_name.contains("gpu") } -/// Build a Dockerfile and push the resulting image into the gateway. +fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) -> bool { + !metadata.is_some_and(|metadata| metadata.is_remote) +} + +/// Build a Dockerfile and make the resulting image available to the gateway. /// -/// Returns the image tag that was built so the caller can use it for sandbox -/// creation. +/// For local Kubernetes gateways running in Docker, this imports the built image +/// into the gateway runtime and returns the Docker tag. For local VM gateways, +/// this exports the built image as a rootfs tar artifact and returns an internal +/// pseudo-image URI understood by the VM driver. async fn build_from_dockerfile( dockerfile: &Path, context: &Path, gateway_name: &str, ) -> Result { + let metadata = get_gateway_metadata(gateway_name); + if !dockerfile_sources_supported_for_gateway(metadata.as_ref()) { + return Err(miette!( + "local Dockerfile sources are only supported for local gateways; gateway '{}' is remote", + gateway_name + )); + } + let timestamp = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() @@ -2685,25 +2699,48 @@ async fn build_from_dockerfile( eprintln!(" {msg}"); }; - openshell_bootstrap::build::build_and_push_image( + openshell_bootstrap::build::build_local_image( dockerfile, &tag, context, - gateway_name, &HashMap::new(), &mut on_log, ) .await?; + let existing_gateway = openshell_bootstrap::check_existing_deployment(gateway_name, None) + .await + .wrap_err("failed to inspect local gateway deployment state")?; + let pushed_into_gateway = existing_gateway + .is_some_and(|gateway| gateway.container_exists && gateway.container_running); + if pushed_into_gateway { + openshell_bootstrap::build::push_image_into_gateway(&tag, gateway_name, &mut on_log) + .await?; + eprintln!(); + eprintln!( + "{} Image {} is available in the gateway.", + "✓".green().bold(), + tag.cyan(), + ); + eprintln!(); + return Ok(tag); + } + + let rootfs_tar = openshell_bootstrap::build::export_local_image_rootfs(&tag, &mut on_log) + .await + .wrap_err("failed to export built image as a VM rootfs artifact")?; + let artifact_ref = openshell_bootstrap::build::encode_rootfs_tar_image_ref(&rootfs_tar)?; + eprintln!(); eprintln!( - "{} Image {} is available in the gateway.", + "{} VM rootfs artifact {} is ready for gateway '{}'.", "✓".green().bold(), - tag.cyan(), + rootfs_tar.display().to_string().cyan(), + gateway_name, ); eprintln!(); - Ok(tag) + Ok(artifact_ref) } /// Load sandbox policy YAML. @@ -5520,13 +5557,13 @@ fn format_timestamp_ms(ms: i64) -> String { #[cfg(test)] mod tests { use super::{ - GatewayControlTarget, TlsOptions, format_gateway_select_header, - format_gateway_select_items, gateway_add, gateway_auth_label, gateway_select_with, - gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, - inferred_provider_type, parse_cli_setting_value, parse_credential_pairs, - plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message, - resolve_gateway_control_target_from, sandbox_should_persist, shell_escape, - source_requests_gpu, validate_gateway_name, validate_ssh_host, + GatewayControlTarget, TlsOptions, dockerfile_sources_supported_for_gateway, + format_gateway_select_header, format_gateway_select_items, gateway_add, gateway_auth_label, + gateway_select_with, gateway_type_label, git_sync_files, http_health_check, + image_requests_gpu, inferred_provider_type, parse_cli_setting_value, + parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message, + ready_false_condition_message, resolve_gateway_control_target_from, sandbox_should_persist, + shell_escape, source_requests_gpu, validate_gateway_name, validate_ssh_host, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -5776,6 +5813,41 @@ mod tests { assert!(!source_requests_gpu("base")); } + #[test] + fn dockerfile_sources_are_rejected_for_remote_gateways() { + let metadata = GatewayMetadata { + name: "remote".to_string(), + gateway_endpoint: "https://gateway.example.com".to_string(), + is_remote: true, + gateway_port: 443, + remote_host: Some("user@gateway.example.com".to_string()), + resolved_host: Some("gateway.example.com".to_string()), + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + }; + + assert!(!dockerfile_sources_supported_for_gateway(Some(&metadata))); + } + + #[test] + fn dockerfile_sources_are_allowed_for_local_gateways() { + let metadata = GatewayMetadata { + name: "local".to_string(), + gateway_endpoint: "http://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + }; + + assert!(dockerfile_sources_supported_for_gateway(Some(&metadata))); + assert!(dockerfile_sources_supported_for_gateway(None)); + } + #[test] fn ready_false_condition_message_prefers_reason_and_message() { let status = SandboxStatus { diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml index 04f4e9fc5..97c15d261 100644 --- a/crates/openshell-driver-vm/Cargo.toml +++ b/crates/openshell-driver-vm/Cargo.toml @@ -21,6 +21,7 @@ path = "src/main.rs" [dependencies] openshell-core = { path = "../openshell-core" } openshell-vfio = { path = "../openshell-vfio" } +openshell-bootstrap = { path = "../openshell-bootstrap" } tokio = { workspace = true } tonic = { workspace = true, features = ["transport"] } @@ -35,9 +36,12 @@ miette = { workspace = true } url = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +oci-client = "0.16" libc = "0.2" libloading = "0.8" tar = "0.4" +flate2 = "1" +sha2 = "0.10" zstd = "0.13" # smol-rs/polling drives the BSD/macOS parent-death detection in diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index 39be02676..def0ae4db 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -2,7 +2,7 @@ > Status: Experimental. The VM compute driver is under active development and the interface still has VM-specific plumbing that will be generalized. -Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and sandbox rootfs are embedded directly in the binary — no sibling files required at runtime. +Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox supervisor are embedded directly in the binary; each sandbox guest rootfs is derived from a configured container image at create time. ## How it fits together @@ -10,7 +10,7 @@ Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) fo flowchart LR subgraph host["Host process"] gateway["openshell-server
(compute::vm::spawn)"] - driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── rootfs.tar.zst"] + driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── openshell-sandbox.zst"] gateway <-->|"gRPC over UDS
compute-driver.sock"| driver end @@ -35,8 +35,9 @@ Sandbox guests execute `/opt/openshell/bin/openshell-sandbox` as PID 1 inside th mise run gateway:vm ``` -First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:rootfs -- --base` builds the embedded rootfs. Subsequent runs are cached. To keep the Unix socket path under macOS `SUN_LEN`, `mise run gateway:vm` and `start.sh` default the state dir to `/tmp/openshell-vm-driver-dev-$USER-port-$PORT/` (SQLite DB + per-sandbox rootfs + `compute-driver.sock`) unless `OPENSHELL_VM_DRIVER_STATE_DIR` is set. -The wrapper auto-registers the gateway with the CLI (`gateway destroy` + `gateway add`) so no manual registration step is needed. When running under `sudo`, it uses `sudo -u $SUDO_USER` for the registration so the config is written under the invoking user's home directory. Re-runs are idempotent. +First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:supervisor` builds the bundled guest supervisor. Subsequent runs are cached. To keep the Unix socket path under macOS `SUN_LEN`, `mise run gateway:vm` and `start.sh` default the state dir to `/tmp/openshell-vm-driver-dev-$USER-/` (SQLite DB + per-sandbox rootfs + `compute-driver.sock`) unless `OPENSHELL_VM_DRIVER_STATE_DIR` is set. +By default the wrapper names the gateway after the repo directory, writes `OPENSHELL_GATEWAY=` into `.env`, and writes plaintext local gateway metadata under `~/.config/openshell/gateways//metadata.json` so repo-local `scripts/bin/openshell status` and `sandbox create` resolve to the VM gateway without an extra `gateway select`. When running under `sudo`, the wrapper uses `sudo -u $SUDO_USER` for the registration so the config is written under the invoking user's home directory. Re-runs are idempotent. +If neither `OPENSHELL_SERVER_PORT` nor `GATEWAY_PORT` is set, the wrapper picks a random free local port once and appends `GATEWAY_PORT=` to `.env`. Later runs reuse that port through `mise`'s env loading. If you set `OPENSHELL_SERVER_PORT` explicitly, the wrapper uses it for that run and still fails fast on conflicts. It also exports `OPENSHELL_DRIVER_DIR=$PWD/target/debug` before starting the gateway so local dev runs use the freshly built `openshell-driver-vm` instead of an older installed copy from `~/.local/libexec/openshell` or `/usr/local/libexec`. For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges: @@ -55,25 +56,24 @@ OPENSHELL_SSH_HANDSHAKE_SECRET=$(openssl rand -hex 32) \ crates/openshell-driver-vm/start.sh ``` -Run multiple dev gateways side by side by giving each one a unique port. The wrapper derives a distinct default state dir from that port automatically: +If you want to pin the project port instead of using the `.env` default: ```shell -OPENSHELL_SERVER_PORT=8080 mise run gateway:vm -OPENSHELL_SERVER_PORT=8081 mise run gateway:vm +GATEWAY_PORT=28080 mise run gateway:vm ``` -If you want a custom suffix instead of `port-$PORT`, set `OPENSHELL_VM_INSTANCE`: +If you want a custom state-dir suffix instead of the repo-name default, set `OPENSHELL_VM_INSTANCE`: ```shell -OPENSHELL_SERVER_PORT=8082 \ +GATEWAY_PORT=28081 \ OPENSHELL_VM_INSTANCE=feature-a \ mise run gateway:vm ``` -If you want a custom CLI gateway name, set `OPENSHELL_VM_GATEWAY_NAME`: +If you want a custom CLI gateway name instead of the repo directory, set `OPENSHELL_VM_GATEWAY_NAME`: ```shell -OPENSHELL_SERVER_PORT=8082 \ +GATEWAY_PORT=28082 \ OPENSHELL_VM_GATEWAY_NAME=vm-feature-a \ mise run gateway:vm ``` @@ -81,7 +81,7 @@ mise run gateway:vm Teardown: ```shell -rm -rf /tmp/openshell-vm-driver-dev-$USER-port-8080 +rm -rf /tmp/openshell-vm-driver-dev-$USER-$(basename "$PWD" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//') ``` ## Manual equivalent @@ -89,9 +89,9 @@ rm -rf /tmp/openshell-vm-driver-dev-$USER-port-8080 If you want to drive the launch yourself instead of using `start.sh`: ```shell -# 1. Stage runtime artifacts + base rootfs into target/vm-runtime-compressed/ +# 1. Stage runtime artifacts + supervisor bundle into target/vm-runtime-compressed/ mise run vm:setup -mise run vm:rootfs -- --base # if rootfs.tar.zst is not already present +mise run vm:supervisor # if openshell-sandbox.zst is not already present # 2. Build both binaries with the staged artifacts embedded OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=$PWD/target/vm-runtime-compressed \ @@ -109,6 +109,7 @@ target/debug/openshell-gateway \ --disable-tls \ --database-url sqlite:/tmp/openshell-vm-driver-dev-$USER-port-8080/openshell.db \ --driver-dir $PWD/target/debug \ + --sandbox-image \ --grpc-endpoint http://host.containers.internal:8080 \ --ssh-handshake-secret dev-vm-driver-secret \ --ssh-gateway-host 127.0.0.1 \ @@ -140,11 +141,12 @@ See [`openshell-gateway --help`](../openshell-server/src/cli.rs) for the full fl The gateway is auto-registered by `start.sh`. In another terminal: ```shell -scripts/bin/openshell sandbox create --name demo -scripts/bin/openshell sandbox connect demo +./scripts/bin/openshell status +./scripts/bin/openshell sandbox create --name demo --from +./scripts/bin/openshell sandbox connect demo ``` -First sandbox takes 10–30 seconds to boot (rootfs extraction + libkrun + guest init). Subsequent creates reuse the prepared sandbox rootfs. +First sandbox takes 10–30 seconds to boot (image fetch/prepare/cache + libkrun + guest init). If `--from` is omitted, the VM driver uses the gateway's configured default sandbox image. Without either `--from` or `--sandbox-image`, VM sandbox creation fails. Subsequent creates reuse the prepared sandbox rootfs. ## Logs and debugging @@ -163,9 +165,9 @@ The VM guest's serial console is appended to `//console.l - Rust toolchain - Guest-supervisor cross-compile toolchain (needed on macOS, and on Linux when host arch ≠ guest arch): - Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest) - - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `build-rootfs.sh` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. + - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. - [mise](https://mise.jdx.dev/) task runner -- Docker (needed by `mise run vm:rootfs` to build the base rootfs) +- Docker-compatible socket on the CLI host when using `openshell sandbox create --from ./Dockerfile` or `--from ./dir` - `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts) ## Relationship to `openshell-vm` diff --git a/crates/openshell-driver-vm/build.rs b/crates/openshell-driver-vm/build.rs index e10a1dde0..36b3eb183 100644 --- a/crates/openshell-driver-vm/build.rs +++ b/crates/openshell-driver-vm/build.rs @@ -3,11 +3,11 @@ //! Build script for openshell-driver-vm. //! -//! This crate embeds the sandbox rootfs plus the minimal libkrun runtime -//! artifacts it needs to boot base VMs without depending on the openshell-vm -//! binary or crate. +//! This crate embeds the sandbox supervisor plus the minimal libkrun runtime +//! artifacts it needs to boot VMs without depending on the openshell-vm binary +//! or crate. -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::{env, fs}; fn main() { @@ -21,8 +21,7 @@ fn main() { "libkrun.dylib.zst", "libkrunfw.5.dylib.zst", "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ] { println!("cargo:rerun-if-changed={dir}/{name}"); } @@ -37,15 +36,7 @@ fn main() { "linux" => ("libkrun.so", "libkrunfw.so.5"), _ => { println!("cargo:warning=VM runtime not available for {target_os}-{target_arch}"); - generate_stub_resources( - &out_dir, - &[ - "libkrun", - "libkrunfw", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", - ], - ); + generate_stub_resources(&out_dir, &["libkrun", "libkrunfw", "openshell-sandbox.zst"]); return; } }; @@ -54,15 +45,14 @@ fn main() { PathBuf::from(dir) } else { println!("cargo:warning=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR not set"); - println!("cargo:warning=Run: mise run vm:setup"); + println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor"); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); return; @@ -73,15 +63,14 @@ fn main() { "cargo:warning=Compressed runtime dir not found: {}", compressed_dir.display() ); - println!("cargo:warning=Run: mise run vm:setup"); + println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor"); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); return; @@ -94,10 +83,9 @@ fn main() { format!("{libkrunfw_name}.zst"), ), ("gvproxy.zst".to_string(), "gvproxy.zst".to_string()), - ("rootfs.tar.zst".to_string(), "rootfs.tar.zst".to_string()), ( - "rootfs-gpu.tar.zst".to_string(), - "rootfs-gpu.tar.zst".to_string(), + "openshell-sandbox.zst".to_string(), + "openshell-sandbox.zst".to_string(), ), ]; @@ -126,26 +114,27 @@ fn main() { e ) }); - let size = fs::metadata(&dst_path).map_or(0, |m| m.len()); + let size = fs::metadata(&dst_path).map(|m| m.len()).unwrap_or(0); println!("cargo:warning=Embedded {src_name}: {size} bytes"); } if !all_found { - println!("cargo:warning=Some artifacts missing. Run: mise run vm:setup"); + println!( + "cargo:warning=Some artifacts missing. Run: mise run vm:setup && mise run vm:supervisor" + ); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); } } -fn generate_stub_resources(out_dir: &Path, names: &[&str]) { +fn generate_stub_resources(out_dir: &PathBuf, names: &[&str]) { for name in names { let path = out_dir.join(name); if !path.exists() { diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 704f91610..7d4dbb9f9 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -5,12 +5,19 @@ use crate::gpu::{ GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name, }; use crate::rootfs::{ - extract_gpu_sandbox_rootfs_to, extract_sandbox_rootfs_to, sandbox_guest_init_path, + create_rootfs_archive_from_dir, extract_rootfs_archive_to, + prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path, }; +use flate2::read::GzDecoder; use futures::Stream; use nix::errno::Errno; use nix::sys::signal::{Signal, kill}; use nix::unistd::Pid; +use oci_client::client::{Client as OciClient, ClientConfig}; +use oci_client::manifest::{ImageIndexEntry, OciDescriptor}; +use oci_client::secrets::RegistryAuth; +use oci_client::{Reference, RegistryOperation}; +use openshell_bootstrap::build::decode_rootfs_tar_image_ref; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, @@ -22,14 +29,19 @@ use openshell_core::proto::compute::v1::{ compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use openshell_vfio::SysfsRoot; +use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; +use std::fs; +use std::io::Read; use std::net::Ipv4Addr; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::process::Stdio; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; +use tokio::io::AsyncWriteExt; use tokio::process::{Child, Command}; use tokio::sync::{Mutex, broadcast, mpsc}; use tokio_stream::wrappers::ReceiverStream; @@ -47,6 +59,11 @@ const GUEST_TLS_DIR: &str = "/opt/openshell/tls"; const GUEST_TLS_CA_PATH: &str = "/opt/openshell/tls/ca.crt"; const GUEST_TLS_CERT_PATH: &str = "/opt/openshell/tls/tls.crt"; const GUEST_TLS_KEY_PATH: &str = "/opt/openshell/tls/tls.key"; +const IMAGE_CACHE_ROOT_DIR: &str = "images"; +const IMAGE_CACHE_ROOTFS_ARCHIVE: &str = "rootfs.tar"; +const IMAGE_IDENTITY_FILE: &str = "image-identity"; +const IMAGE_REFERENCE_FILE: &str = "image-reference"; +static IMAGE_CACHE_BUILD_COUNTER: AtomicU64 = AtomicU64::new(0); #[derive(Debug, Clone)] struct VmDriverTlsPaths { @@ -60,6 +77,7 @@ pub struct VmDriverConfig { pub openshell_endpoint: String, pub state_dir: PathBuf, pub launcher_bin: Option, + pub default_image: String, pub ssh_handshake_secret: String, pub ssh_handshake_skew_secs: u64, pub log_level: String, @@ -80,6 +98,7 @@ impl Default for VmDriverConfig { openshell_endpoint: String::new(), state_dir: PathBuf::from("target/openshell-vm-driver"), launcher_bin: None, + default_image: String::new(), ssh_handshake_secret: String::new(), ssh_handshake_skew_secs: 300, log_level: "info".to_string(), @@ -188,6 +207,7 @@ pub struct VmDriver { config: VmDriverConfig, launcher_bin: PathBuf, registry: Arc>>, + image_cache_lock: Arc>, events: broadcast::Sender, gpu_inventory: Option>>, subnet_allocator: Arc>, @@ -209,7 +229,7 @@ impl VmDriver { .map_err(|e| format!("cleanup stale TAP interfaces panicked: {e}"))?; } - let state_root = config.state_dir.join("sandboxes"); + let state_root = sandboxes_root_dir(&config.state_dir); tokio::fs::create_dir_all(&state_root) .await .map_err(|err| { @@ -218,6 +238,15 @@ impl VmDriver { state_root.display() ) })?; + let image_cache_root = image_cache_root_dir(&config.state_dir); + tokio::fs::create_dir_all(&image_cache_root) + .await + .map_err(|err| { + format!( + "failed to create state dir '{}': {err}", + image_cache_root.display() + ) + })?; let launcher_bin = if let Some(path) = config.launcher_bin.clone() { path @@ -248,6 +277,7 @@ impl VmDriver { config, launcher_bin, registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), events, gpu_inventory, subnet_allocator, @@ -264,7 +294,7 @@ impl VmDriver { GetCapabilitiesResponse { driver_name: DRIVER_NAME.to_string(), driver_version: openshell_core::VERSION.to_string(), - default_image: String::new(), + default_image: self.config.default_image.clone(), supports_gpu: self.gpu_inventory.is_some(), gpu_count, } @@ -274,7 +304,13 @@ impl VmDriver { // gRPC API surface; boxing here would diverge from every other handler. #[allow(clippy::result_large_err)] pub fn validate_sandbox(&self, sandbox: &Sandbox) -> Result<(), Status> { - validate_vm_sandbox(sandbox, self.config.gpu_enabled) + validate_vm_sandbox(sandbox, self.config.gpu_enabled)?; + if self.resolved_sandbox_image(sandbox).is_none() { + return Err(Status::failed_precondition( + "vm sandboxes require template.image or a configured default sandbox image", + )); + } + Ok(()) } // `tonic::Status` is large but is the standard error type across the @@ -293,6 +329,11 @@ impl VmDriver { let state_dir = sandbox_state_dir(&self.config.state_dir, &sandbox.id); let rootfs = state_dir.join("rootfs"); + let image_ref = self.resolved_sandbox_image(sandbox).ok_or_else(|| { + Status::failed_precondition( + "vm sandboxes require template.image or a configured default sandbox image", + ) + })?; tokio::fs::create_dir_all(&state_dir) .await @@ -302,22 +343,29 @@ impl VmDriver { .config .tls_paths() .map_err(Status::failed_precondition)?; - let rootfs_for_extract = rootfs.clone(); - let extract_fn = if is_gpu { - extract_gpu_sandbox_rootfs_to - } else { - extract_sandbox_rootfs_to + let image_identity = match self.prepare_runtime_rootfs(&image_ref, &rootfs).await { + Ok(image_identity) => image_identity, + Err(err) => { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(err); + } }; - tokio::task::spawn_blocking(move || extract_fn(&rootfs_for_extract)) - .await - .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? - .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; if let Some(tls_paths) = tls_paths.as_ref() { - prepare_guest_tls_materials(&rootfs, tls_paths) - .await - .map_err(|err| { - Status::internal(format!("prepare guest TLS materials failed: {err}")) - })?; + if let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::internal(format!( + "prepare guest TLS materials failed: {err}" + ))); + } + } + + if let Err(err) = + write_sandbox_image_metadata(&state_dir, &image_ref, &image_identity).await + { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::internal(format!( + "write sandbox image metadata failed: {err}" + ))); } let gpu_bdf = if is_gpu { @@ -587,6 +635,276 @@ impl VmDriver { } } + async fn prepare_runtime_rootfs( + &self, + image_ref: &str, + rootfs: &Path, + ) -> Result { + let image_identity = self.ensure_cached_image_rootfs_archive(image_ref).await?; + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + let rootfs_dest = rootfs.to_path_buf(); + tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest)) + .await + .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? + .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; + + Ok(image_identity) + } + + fn resolved_sandbox_image(&self, sandbox: &Sandbox) -> Option { + requested_sandbox_image(sandbox) + .map(ToOwned::to_owned) + .or_else(|| { + let image = self.config.default_image.trim(); + (!image.is_empty()).then(|| image.to_string()) + }) + } + + async fn ensure_cached_image_rootfs_archive(&self, image_ref: &str) -> Result { + if let Some(rootfs_tar_path) = decode_rootfs_tar_image_ref(image_ref) { + return self + .ensure_cached_rootfs_tar_image_rootfs_archive(image_ref, &rootfs_tar_path) + .await; + } + + let reference = parse_registry_reference(image_ref)?; + let client = registry_client(); + let auth = registry_auth(image_ref)?; + client + .auth(&reference, &auth, RegistryOperation::Pull) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" + )) + })?; + let image_identity = client + .fetch_manifest_digest(&reference, &auth) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to resolve vm sandbox image '{image_ref}': {err}" + )) + })?; + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + + if tokio::fs::metadata(&archive_path).await.is_ok() { + return Ok(image_identity); + } + + let _cache_guard = self.image_cache_lock.lock().await; + if tokio::fs::metadata(&archive_path).await.is_ok() { + return Ok(image_identity); + } + + self.build_cached_registry_image_rootfs_archive( + &client, + &reference, + &auth, + image_ref, + &image_identity, + ) + .await?; + Ok(image_identity) + } + + async fn ensure_cached_rootfs_tar_image_rootfs_archive( + &self, + image_ref: &str, + rootfs_tar_path: &Path, + ) -> Result { + let rootfs_tar = rootfs_tar_path.to_path_buf(); + let image_identity = tokio::task::spawn_blocking(move || compute_file_sha256(&rootfs_tar)) + .await + .map_err(|err| { + Status::internal(format!("rootfs tar digest computation panicked: {err}")) + })? + .map_err(|err| { + Status::failed_precondition(format!( + "failed to fingerprint vm sandbox rootfs artifact '{}': {err}", + rootfs_tar_path.display() + )) + })?; + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + + if tokio::fs::metadata(&archive_path).await.is_ok() { + return Ok(image_identity); + } + + let _cache_guard = self.image_cache_lock.lock().await; + if tokio::fs::metadata(&archive_path).await.is_ok() { + return Ok(image_identity); + } + + self.build_cached_rootfs_tar_image_rootfs_archive( + image_ref, + rootfs_tar_path, + &image_identity, + ) + .await?; + Ok(image_identity) + } + + async fn build_cached_rootfs_tar_image_rootfs_archive( + &self, + image_ref: &str, + rootfs_tar_path: &Path, + image_identity: &str, + ) -> Result<(), Status> { + let cache_dir = image_cache_dir(&self.config.state_dir, image_identity); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); + let staging_dir = image_cache_staging_dir(&self.config.state_dir, image_identity); + let prepared_rootfs = staging_dir.join("rootfs"); + let prepared_archive = staging_dir.join(IMAGE_CACHE_ROOTFS_ARCHIVE); + + tokio::fs::create_dir_all(image_cache_root_dir(&self.config.state_dir)) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + tokio::fs::create_dir_all(&cache_dir) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + + if tokio::fs::metadata(&staging_dir).await.is_ok() { + tokio::fs::remove_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!( + "remove stale image cache staging dir failed: {err}" + )) + })?; + } + tokio::fs::create_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!("create image cache staging dir failed: {err}")) + })?; + + let image_ref_owned = image_ref.to_string(); + let image_identity_owned = image_identity.to_string(); + let rootfs_tar_path_owned = rootfs_tar_path.to_path_buf(); + let prepared_rootfs_for_build = prepared_rootfs.clone(); + let prepared_archive_for_build = prepared_archive.clone(); + let build_result = tokio::task::spawn_blocking(move || { + extract_rootfs_archive_to(&rootfs_tar_path_owned, &prepared_rootfs_for_build)?; + prepare_sandbox_rootfs_from_image_root( + &prepared_rootfs_for_build, + &image_identity_owned, + ) + .map_err(|err| { + format!( + "vm sandbox image '{}' is not base-compatible: {err}", + image_ref_owned + ) + })?; + create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) + }) + .await + .map_err(|err| Status::internal(format!("rootfs artifact preparation panicked: {err}")))?; + + if let Err(err) = build_result { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(Status::failed_precondition(err)); + } + + if tokio::fs::metadata(&archive_path).await.is_ok() { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Ok(()); + } + + tokio::fs::rename(&prepared_archive, &archive_path) + .await + .map_err(|err| Status::internal(format!("store cached image rootfs failed: {err}")))?; + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + Ok(()) + } + + async fn build_cached_registry_image_rootfs_archive( + &self, + client: &OciClient, + reference: &Reference, + auth: &RegistryAuth, + image_ref: &str, + image_identity: &str, + ) -> Result<(), Status> { + let cache_dir = image_cache_dir(&self.config.state_dir, image_identity); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); + let staging_dir = image_cache_staging_dir(&self.config.state_dir, image_identity); + let prepared_rootfs = staging_dir.join("rootfs"); + let prepared_archive = staging_dir.join(IMAGE_CACHE_ROOTFS_ARCHIVE); + + tokio::fs::create_dir_all(image_cache_root_dir(&self.config.state_dir)) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + tokio::fs::create_dir_all(&cache_dir) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + + if tokio::fs::metadata(&staging_dir).await.is_ok() { + tokio::fs::remove_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!( + "remove stale image cache staging dir failed: {err}" + )) + })?; + } + tokio::fs::create_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!("create image cache staging dir failed: {err}")) + })?; + + if let Err(err) = pull_registry_image_rootfs( + client, + reference, + auth, + image_ref, + &staging_dir, + &prepared_rootfs, + ) + .await + { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(err); + } + + let image_ref_owned = image_ref.to_string(); + let image_identity_owned = image_identity.to_string(); + let prepared_rootfs_for_build = prepared_rootfs.clone(); + let prepared_archive_for_build = prepared_archive.clone(); + let build_result = tokio::task::spawn_blocking(move || { + prepare_sandbox_rootfs_from_image_root( + &prepared_rootfs_for_build, + &image_identity_owned, + ) + .map_err(|err| { + format!( + "vm sandbox image '{}' is not base-compatible: {err}", + image_ref_owned + ) + })?; + create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) + }) + .await + .map_err(|err| Status::internal(format!("image rootfs preparation panicked: {err}")))?; + + if let Err(err) = build_result { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(Status::failed_precondition(err)); + } + + if tokio::fs::metadata(&archive_path).await.is_ok() { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Ok(()); + } + + tokio::fs::rename(&prepared_archive, &archive_path) + .await + .map_err(|err| Status::internal(format!("store cached image rootfs failed: {err}")))?; + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + Ok(()) + } + /// Watch the launcher child process and surface errors as driver /// conditions. /// @@ -891,11 +1209,6 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu } if let Some(template) = spec.template.as_ref() { - if !template.image.is_empty() { - return Err(Status::failed_precondition( - "vm sandboxes do not support template.image", - )); - } if !template.agent_socket_path.is_empty() { return Err(Status::failed_precondition( "vm sandboxes do not support template.agent_socket_path", @@ -915,6 +1228,426 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu Ok(()) } +fn parse_registry_reference(image_ref: &str) -> Result { + Reference::try_from(image_ref).map_err(|err| { + Status::failed_precondition(format!( + "invalid vm sandbox image reference '{image_ref}': {err}" + )) + }) +} + +fn registry_client() -> OciClient { + OciClient::new(ClientConfig { + platform_resolver: Some(Box::new(linux_platform_resolver)), + ..Default::default() + }) +} + +fn linux_platform_resolver(manifests: &[ImageIndexEntry]) -> Option { + let expected_arch = linux_oci_arch(); + manifests + .iter() + .find_map(|entry| { + let platform = entry.platform.as_ref()?; + (platform.os.to_string() == "linux" + && platform.architecture.to_string() == expected_arch) + .then(|| entry.digest.clone()) + }) + .or_else(|| { + manifests.iter().find_map(|entry| { + let platform = entry.platform.as_ref()?; + (platform.os.to_string() == "linux").then(|| entry.digest.clone()) + }) + }) +} + +fn linux_oci_arch() -> &'static str { + match std::env::consts::ARCH { + "x86_64" => "amd64", + "aarch64" => "arm64", + "arm" => "arm", + other => other, + } +} + +fn registry_auth(image_ref: &str) -> Result { + let username = env_non_empty("OPENSHELL_REGISTRY_USERNAME"); + let token = env_non_empty("OPENSHELL_REGISTRY_TOKEN"); + + match token { + Some(token) => { + let username = match username { + Some(username) => username, + None if image_reference_registry_host(image_ref) + .eq_ignore_ascii_case("ghcr.io") => + { + "__token__".to_string() + } + None => { + return Err(Status::failed_precondition( + "OPENSHELL_REGISTRY_USERNAME is required when OPENSHELL_REGISTRY_TOKEN is set for non-GHCR registries", + )); + } + }; + Ok(RegistryAuth::Basic(username, token)) + } + None => Ok(RegistryAuth::Anonymous), + } +} + +fn env_non_empty(key: &str) -> Option { + std::env::var(key) + .ok() + .filter(|value| !value.trim().is_empty()) +} + +fn image_reference_registry_host(image_ref: &str) -> &str { + let first = image_ref.split('/').next().unwrap_or(image_ref); + if first.contains('.') || first.contains(':') || first.eq_ignore_ascii_case("localhost") { + first + } else { + "docker.io" + } +} + +async fn pull_registry_image_rootfs( + client: &OciClient, + reference: &Reference, + auth: &RegistryAuth, + image_ref: &str, + staging_dir: &Path, + rootfs: &Path, +) -> Result<(), Status> { + client + .auth(reference, auth, RegistryOperation::Pull) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" + )) + })?; + let (manifest, _) = client + .pull_image_manifest(reference, auth) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to pull vm sandbox image manifest '{image_ref}': {err}" + )) + })?; + + tokio::fs::create_dir_all(rootfs) + .await + .map_err(|err| Status::internal(format!("create rootfs dir failed: {err}")))?; + tokio::fs::create_dir_all(staging_dir.join("layers")) + .await + .map_err(|err| Status::internal(format!("create layer staging dir failed: {err}")))?; + + for (index, layer) in manifest.layers.iter().enumerate() { + pull_registry_layer( + client, + reference, + image_ref, + staging_dir, + rootfs, + layer, + index, + ) + .await?; + } + + Ok(()) +} + +async fn pull_registry_layer( + client: &OciClient, + reference: &Reference, + image_ref: &str, + staging_dir: &Path, + rootfs: &Path, + layer: &OciDescriptor, + index: usize, +) -> Result<(), Status> { + let digest_component = sanitize_image_identity(&layer.digest); + let blob_path = staging_dir + .join("layers") + .join(format!("{index:02}-{digest_component}.blob")); + let layer_root = staging_dir + .join("layers") + .join(format!("{index:02}-{digest_component}.root")); + + let mut file = tokio::fs::File::create(&blob_path) + .await + .map_err(|err| Status::internal(format!("create layer blob failed: {err}")))?; + client + .pull_blob(reference, layer, &mut file) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to download layer '{}' for vm sandbox image '{image_ref}': {err}", + layer.digest + )) + })?; + file.flush() + .await + .map_err(|err| Status::internal(format!("flush layer blob failed: {err}")))?; + + let blob_path_for_digest = blob_path.clone(); + let expected_digest = layer.digest.clone(); + tokio::task::spawn_blocking(move || { + verify_descriptor_digest(&blob_path_for_digest, &expected_digest) + }) + .await + .map_err(|err| Status::internal(format!("layer digest verification panicked: {err}")))? + .map_err(|err| { + Status::failed_precondition(format!( + "vm sandbox image layer verification failed for '{}': {err}", + layer.digest + )) + })?; + + let blob_path_for_unpack = blob_path.clone(); + let layer_root_for_unpack = layer_root.clone(); + let rootfs_for_unpack = rootfs.to_path_buf(); + let media_type = layer.media_type.clone(); + tokio::task::spawn_blocking(move || { + extract_layer_blob_to_dir(&blob_path_for_unpack, &media_type, &layer_root_for_unpack)?; + apply_layer_dir_to_rootfs(&layer_root_for_unpack, &rootfs_for_unpack) + }) + .await + .map_err(|err| Status::internal(format!("layer extraction panicked: {err}")))? + .map_err(|err| { + Status::failed_precondition(format!( + "failed to apply layer '{}' for vm sandbox image '{image_ref}': {err}", + layer.digest + )) + }) +} + +fn verify_descriptor_digest(path: &Path, expected_digest: &str) -> Result<(), String> { + let expected = expected_digest + .strip_prefix("sha256:") + .ok_or_else(|| format!("unsupported layer digest '{expected_digest}'"))?; + let actual = compute_file_sha256_hex(path)?; + if actual == expected { + Ok(()) + } else { + Err(format!( + "digest mismatch for {}: expected sha256:{expected}, got sha256:{actual}", + path.display() + )) + } +} + +fn compute_file_sha256(path: &Path) -> Result { + compute_file_sha256_hex(path).map(|digest| format!("sha256:{digest}")) +} + +fn compute_file_sha256_hex(path: &Path) -> Result { + let mut file = fs::File::open(path).map_err(|err| format!("open {}: {err}", path.display()))?; + let mut hasher = Sha256::new(); + let mut buffer = [0_u8; 64 * 1024]; + loop { + let read = file + .read(&mut buffer) + .map_err(|err| format!("read {}: {err}", path.display()))?; + if read == 0 { + break; + } + hasher.update(&buffer[..read]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +fn extract_layer_blob_to_dir( + blob_path: &Path, + media_type: &str, + dest: &Path, +) -> Result<(), String> { + if dest.exists() { + fs::remove_dir_all(dest).map_err(|err| format!("remove {}: {err}", dest.display()))?; + } + fs::create_dir_all(dest).map_err(|err| format!("create {}: {err}", dest.display()))?; + + let file = + fs::File::open(blob_path).map_err(|err| format!("open {}: {err}", blob_path.display()))?; + match layer_compression_from_media_type(media_type)? { + LayerCompression::None => extract_tar_reader_to_dir(file, dest), + LayerCompression::Gzip => extract_tar_reader_to_dir(GzDecoder::new(file), dest), + LayerCompression::Zstd => { + let decoder = zstd::stream::read::Decoder::new(file) + .map_err(|err| format!("decompress {}: {err}", blob_path.display()))?; + extract_tar_reader_to_dir(decoder, dest) + } + } +} + +fn extract_tar_reader_to_dir(reader: impl Read, dest: &Path) -> Result<(), String> { + let mut archive = tar::Archive::new(reader); + archive + .unpack(dest) + .map_err(|err| format!("extract layer into {}: {err}", dest.display())) +} + +fn layer_compression_from_media_type(media_type: &str) -> Result { + if media_type.is_empty() { + return Err("layer media type is missing".to_string()); + } + if media_type.ends_with("+zstd") { + return Ok(LayerCompression::Zstd); + } + if media_type.ends_with("+gzip") || media_type.ends_with(".gzip") { + return Ok(LayerCompression::Gzip); + } + if media_type.ends_with(".tar") + || media_type.ends_with("tar") + || media_type == "application/vnd.oci.image.layer.v1.tar" + || media_type == "application/vnd.oci.image.layer.nondistributable.v1.tar" + { + return Ok(LayerCompression::None); + } + Err(format!("unsupported layer media type '{media_type}'")) +} + +fn apply_layer_dir_to_rootfs(layer_root: &Path, rootfs: &Path) -> Result<(), String> { + merge_layer_directory(layer_root, rootfs) +} + +fn merge_layer_directory(source_dir: &Path, target_dir: &Path) -> Result<(), String> { + fs::create_dir_all(target_dir) + .map_err(|err| format!("create {}: {err}", target_dir.display()))?; + + let mut entries = fs::read_dir(source_dir) + .map_err(|err| format!("read {}: {err}", source_dir.display()))? + .collect::, _>>() + .map_err(|err| format!("read {}: {err}", source_dir.display()))?; + entries.sort_by_key(|entry| entry.file_name()); + + if entries + .iter() + .any(|entry| entry.file_name().to_string_lossy() == ".wh..wh..opq") + { + clear_directory_contents(target_dir)?; + } + + for entry in entries { + let file_name = entry.file_name(); + let name = file_name.to_string_lossy(); + if name == ".wh..wh..opq" { + continue; + } + if let Some(hidden_name) = name.strip_prefix(".wh.") { + remove_path_if_exists(&target_dir.join(hidden_name))?; + continue; + } + + let source_path = entry.path(); + let dest_path = target_dir.join(&file_name); + let metadata = fs::symlink_metadata(&source_path) + .map_err(|err| format!("stat {}: {err}", source_path.display()))?; + let file_type = metadata.file_type(); + + if file_type.is_dir() { + if dest_path.exists() + && !fs::symlink_metadata(&dest_path) + .map_err(|err| format!("stat {}: {err}", dest_path.display()))? + .file_type() + .is_dir() + { + remove_path_if_exists(&dest_path)?; + } + fs::create_dir_all(&dest_path) + .map_err(|err| format!("create {}: {err}", dest_path.display()))?; + merge_layer_directory(&source_path, &dest_path)?; + fs::set_permissions(&dest_path, metadata.permissions()) + .map_err(|err| format!("chmod {}: {err}", dest_path.display()))?; + } else if file_type.is_file() { + remove_path_if_exists(&dest_path)?; + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create {}: {err}", parent.display()))?; + } + fs::copy(&source_path, &dest_path).map_err(|err| { + format!( + "copy {} to {}: {err}", + source_path.display(), + dest_path.display() + ) + })?; + fs::set_permissions(&dest_path, metadata.permissions()) + .map_err(|err| format!("chmod {}: {err}", dest_path.display()))?; + } else if file_type.is_symlink() { + copy_symlink(&source_path, &dest_path)?; + } else { + return Err(format!( + "unsupported layer entry type at {}", + source_path.display() + )); + } + } + + Ok(()) +} + +fn clear_directory_contents(dir: &Path) -> Result<(), String> { + if !dir.exists() { + return Ok(()); + } + for entry in fs::read_dir(dir).map_err(|err| format!("read {}: {err}", dir.display()))? { + let entry = entry.map_err(|err| format!("read {}: {err}", dir.display()))?; + remove_path_if_exists(&entry.path())?; + } + Ok(()) +} + +fn remove_path_if_exists(path: &Path) -> Result<(), String> { + let Ok(metadata) = fs::symlink_metadata(path) else { + return Ok(()); + }; + if metadata.file_type().is_dir() { + fs::remove_dir_all(path).map_err(|err| format!("remove {}: {err}", path.display())) + } else { + fs::remove_file(path).map_err(|err| format!("remove {}: {err}", path.display())) + } +} + +#[cfg(unix)] +fn copy_symlink(source_path: &Path, dest_path: &Path) -> Result<(), String> { + let target = fs::read_link(source_path) + .map_err(|err| format!("readlink {}: {err}", source_path.display()))?; + remove_path_if_exists(dest_path)?; + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?; + } + std::os::unix::fs::symlink(&target, dest_path).map_err(|err| { + format!( + "symlink {} to {}: {err}", + target.display(), + dest_path.display() + ) + }) +} + +#[cfg(not(unix))] +fn copy_symlink(_source_path: &Path, _dest_path: &Path) -> Result<(), String> { + Err("symlink layers are only supported on Unix hosts".to_string()) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum LayerCompression { + None, + Gzip, + Zstd, +} + +fn requested_sandbox_image(sandbox: &Sandbox) -> Option<&str> { + sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .map(|template| template.image.trim()) + .filter(|image| !image.is_empty()) +} + fn merged_environment(sandbox: &Sandbox) -> HashMap { let mut environment = sandbox .spec @@ -1033,8 +1766,69 @@ fn sandbox_log_level(sandbox: &Sandbox, default_level: &str) -> String { .to_string() } +fn sandboxes_root_dir(root: &Path) -> PathBuf { + root.join("sandboxes") +} + fn sandbox_state_dir(root: &Path, sandbox_id: &str) -> PathBuf { - root.join("sandboxes").join(sandbox_id) + sandboxes_root_dir(root).join(sandbox_id) +} + +fn image_cache_root_dir(root: &Path) -> PathBuf { + root.join(IMAGE_CACHE_ROOT_DIR) +} + +fn image_cache_dir(root: &Path, image_identity: &str) -> PathBuf { + image_cache_root_dir(root).join(sanitize_image_identity(image_identity)) +} + +fn image_cache_rootfs_archive(root: &Path, image_identity: &str) -> PathBuf { + image_cache_dir(root, image_identity).join(IMAGE_CACHE_ROOTFS_ARCHIVE) +} + +fn image_cache_staging_dir(root: &Path, image_identity: &str) -> PathBuf { + image_cache_root_dir(root).join(format!( + "{}.staging-{}", + sanitize_image_identity(image_identity), + unique_image_cache_suffix() + )) +} + +fn sanitize_image_identity(image_identity: &str) -> String { + image_identity + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' { + ch + } else { + '-' + } + }) + .collect() +} + +fn unique_image_cache_suffix() -> String { + let counter = IMAGE_CACHE_BUILD_COUNTER.fetch_add(1, Ordering::Relaxed); + format!("{}-{counter}", current_time_ms()) +} + +async fn write_sandbox_image_metadata( + state_dir: &Path, + image_ref: &str, + image_identity: &str, +) -> Result<(), std::io::Error> { + tokio::fs::write( + state_dir.join(IMAGE_IDENTITY_FILE), + format!("{image_identity}\n"), + ) + .await?; + tokio::fs::write( + state_dir.join(IMAGE_REFERENCE_FILE), + format!("{image_ref}\n"), + ) + .await?; + + Ok(()) } async fn prepare_guest_tls_materials( @@ -1056,7 +1850,7 @@ async fn copy_guest_tls_material( mode: u32, ) -> Result<(), std::io::Error> { tokio::fs::copy(source, dest).await?; - tokio::fs::set_permissions(dest, std::fs::Permissions::from_mode(mode)).await?; + tokio::fs::set_permissions(dest, fs::Permissions::from_mode(mode)).await?; Ok(()) } @@ -1169,6 +1963,7 @@ mod tests { DriverSandboxSpec as SandboxSpec, DriverSandboxTemplate as SandboxTemplate, }; use prost_types::{Struct, Value, value::Kind}; + use std::fs; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; use tonic::Code; @@ -1242,6 +2037,112 @@ mod tests { assert!(err.message().contains("platform_config")); } + #[test] + fn validate_vm_sandbox_accepts_template_image() { + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate { + image: "ghcr.io/example/sandbox:latest".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + validate_vm_sandbox(&sandbox).expect("template.image should be accepted"); + } + + #[test] + fn capabilities_report_configured_default_image() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:dev".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + }; + + assert_eq!(driver.capabilities().default_image, "openshell/sandbox:dev"); + } + + #[test] + fn resolved_sandbox_image_prefers_template_image() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:default".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate { + image: "ghcr.io/example/custom:latest".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + driver.resolved_sandbox_image(&sandbox).as_deref(), + Some("ghcr.io/example/custom:latest") + ); + } + + #[test] + fn resolved_sandbox_image_falls_back_to_driver_default() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:default".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate::default()), + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + driver.resolved_sandbox_image(&sandbox).as_deref(), + Some("openshell/sandbox:default") + ); + } + + #[test] + fn resolved_sandbox_image_returns_none_without_template_or_default() { + let driver = VmDriver { + config: VmDriverConfig::default(), + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate::default()), + ..Default::default() + }), + ..Default::default() + }; + + assert!(driver.resolved_sandbox_image(&sandbox).is_none()); + } + #[test] fn merged_environment_prefers_spec_values() { let sandbox = Sandbox { @@ -1359,6 +2260,64 @@ mod tests { ); } + #[test] + fn image_reference_registry_host_defaults_to_docker_hub() { + assert_eq!(image_reference_registry_host("ubuntu:24.04"), "docker.io"); + assert_eq!( + image_reference_registry_host("ghcr.io/nvidia/openshell/base:latest"), + "ghcr.io" + ); + assert_eq!( + image_reference_registry_host("localhost:5000/example/sandbox:dev"), + "localhost:5000" + ); + } + + #[test] + fn apply_layer_dir_to_rootfs_honors_whiteouts() { + let base = unique_temp_dir(); + let rootfs = base.join("rootfs"); + let layer = base.join("layer"); + + fs::create_dir_all(rootfs.join("dir")).unwrap(); + fs::write(rootfs.join("removed.txt"), "old").unwrap(); + fs::write(rootfs.join("dir/old.txt"), "old").unwrap(); + + fs::create_dir_all(layer.join("dir")).unwrap(); + fs::write(layer.join(".wh.removed.txt"), "").unwrap(); + fs::write(layer.join("dir/.wh..wh..opq"), "").unwrap(); + fs::write(layer.join("dir/new.txt"), "new").unwrap(); + + apply_layer_dir_to_rootfs(&layer, &rootfs).unwrap(); + + assert!(!rootfs.join("removed.txt").exists()); + assert!(!rootfs.join("dir/old.txt").exists()); + assert_eq!( + fs::read_to_string(rootfs.join("dir/new.txt")).unwrap(), + "new" + ); + + let _ = fs::remove_dir_all(base); + } + + #[test] + fn layer_compression_from_media_type_supports_common_formats() { + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar").unwrap(), + LayerCompression::None + ); + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar+gzip") + .unwrap(), + LayerCompression::Gzip + ); + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar+zstd") + .unwrap(), + LayerCompression::Zstd + ); + } + #[test] fn build_guest_environment_includes_tls_paths_for_https_endpoint() { let config = VmDriverConfig { @@ -1401,6 +2360,7 @@ mod tests { config: VmDriverConfig::default(), launcher_bin: PathBuf::from("openshell-driver-vm"), registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), events, gpu_inventory: None, subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( @@ -1480,6 +2440,29 @@ mod tests { .expect("dns endpoint should be accepted"); } + #[test] + fn compute_file_sha256_returns_prefixed_digest() { + let base = unique_temp_dir(); + fs::create_dir_all(&base).unwrap(); + let file = base.join("rootfs.tar"); + fs::write(&file, b"openshell").unwrap(); + + assert_eq!( + compute_file_sha256(&file).unwrap(), + "sha256:dc5cbc21a452a783ec453e8a8603101dfec5c7d6a19b6c645889bec8b97c2390" + ); + + let _ = fs::remove_dir_all(base); + } + + #[test] + fn sanitize_image_identity_rewrites_path_separators() { + assert_eq!( + sanitize_image_identity("sha256:abc/def@ghi"), + "sha256-abc-def-ghi" + ); + } + #[tokio::test] async fn prepare_guest_tls_materials_copies_bundle_into_rootfs() { let base = unique_temp_dir(); diff --git a/crates/openshell-driver-vm/src/main.rs b/crates/openshell-driver-vm/src/main.rs index ca1842596..596e6c88d 100644 --- a/crates/openshell-driver-vm/src/main.rs +++ b/crates/openshell-driver-vm/src/main.rs @@ -62,6 +62,9 @@ struct Args { #[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")] openshell_endpoint: Option, + #[arg(long, env = "OPENSHELL_SANDBOX_IMAGE", default_value = "")] + default_image: String, + #[arg( long, env = "OPENSHELL_VM_DRIVER_STATE_DIR", @@ -169,6 +172,7 @@ async fn main() -> Result<()> { .ok_or_else(|| miette::miette!("OPENSHELL_GRPC_ENDPOINT is required"))?, state_dir: args.state_dir, launcher_bin: None, + default_image: args.default_image, ssh_handshake_secret: args.ssh_handshake_secret.unwrap_or_default(), ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, log_level: args.log_level, diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index 7c7fa19f4..929641945 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -2,74 +2,138 @@ // SPDX-License-Identifier: Apache-2.0 use std::fs; -use std::io::Cursor; +use std::fs::File; +use std::io::{BufWriter, Cursor}; use std::path::Path; -const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst")); -const ROOTFS_GPU: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs-gpu.tar.zst")); +const SUPERVISOR: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/openshell-sandbox.zst")); const ROOTFS_VARIANT_MARKER: &str = ".openshell-rootfs-variant"; const SANDBOX_GUEST_INIT_PATH: &str = "/srv/openshell-vm-sandbox-init.sh"; +const SANDBOX_SUPERVISOR_PATH: &str = "/opt/openshell/bin/openshell-sandbox"; pub const fn sandbox_guest_init_path() -> &'static str { SANDBOX_GUEST_INIT_PATH } -pub fn extract_sandbox_rootfs_to(dest: &Path) -> Result<(), String> { - extract_variant( - ROOTFS, - "sandbox", - "sandbox rootfs not embedded. Build openshell-driver-vm with OPENSHELL_VM_RUNTIME_COMPRESSED_DIR set or run `mise run vm:setup` first", - dest, - ) -} - -pub fn extract_gpu_sandbox_rootfs_to(dest: &Path) -> Result<(), String> { - extract_variant( - ROOTFS_GPU, - "sandbox-gpu", - "GPU sandbox rootfs not embedded. Build with `mise run vm:rootfs -- --gpu` first", - dest, +pub fn prepare_sandbox_rootfs_from_image_root( + rootfs: &Path, + image_identity: &str, +) -> Result<(), String> { + prepare_sandbox_rootfs(rootfs)?; + validate_sandbox_rootfs(rootfs)?; + fs::write( + rootfs.join(ROOTFS_VARIANT_MARKER), + format!("{}:image:{image_identity}\n", env!("CARGO_PKG_VERSION")), ) + .map_err(|e| format!("write rootfs variant marker: {e}"))?; + Ok(()) } -fn extract_variant(blob: &[u8], variant: &str, empty_msg: &str, dest: &Path) -> Result<(), String> { - if blob.is_empty() { - return Err(empty_msg.to_string()); - } - - let expected_marker = format!("{}:{variant}", env!("CARGO_PKG_VERSION")); - let marker_path = dest.join(ROOTFS_VARIANT_MARKER); - - if dest.is_dir() - && fs::read_to_string(&marker_path).is_ok_and(|value| value.trim() == expected_marker) - { - return Ok(()); - } - +pub fn extract_rootfs_archive_to(archive_path: &Path, dest: &Path) -> Result<(), String> { if dest.exists() { fs::remove_dir_all(dest) .map_err(|e| format!("remove old rootfs {}: {e}", dest.display()))?; } - unpack_zstd_tar(blob, variant, dest)?; - prepare_sandbox_rootfs(dest)?; - fs::write(marker_path, format!("{expected_marker}\n")) - .map_err(|e| format!("write rootfs variant marker: {e}"))?; - Ok(()) + fs::create_dir_all(dest).map_err(|e| format!("create rootfs dir {}: {e}", dest.display()))?; + let file = + File::open(archive_path).map_err(|e| format!("open {}: {e}", archive_path.display()))?; + let mut archive = tar::Archive::new(file); + archive + .unpack(dest) + .map_err(|e| format!("extract rootfs tarball into {}: {e}", dest.display())) } -fn unpack_zstd_tar(blob: &[u8], label: &str, dest: &Path) -> Result<(), String> { - fs::create_dir_all(dest).map_err(|e| format!("create rootfs dir {}: {e}", dest.display()))?; +pub fn create_rootfs_archive_from_dir(source: &Path, archive_path: &Path) -> Result<(), String> { + if let Some(parent) = archive_path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?; + } - let decoder = zstd::Decoder::new(Cursor::new(blob)) - .map_err(|e| format!("decompress {label} rootfs: {e}"))?; - let mut archive = tar::Archive::new(decoder); - archive.unpack(dest).map_err(|e| { + let file = File::create(archive_path) + .map_err(|e| format!("create {}: {e}", archive_path.display()))?; + let writer = BufWriter::new(file); + let mut builder = tar::Builder::new(writer); + append_rootfs_tree_to_archive(&mut builder, source, Path::new("")).map_err(|e| { format!( - "extract {label} rootfs tarball into {}: {e}", - dest.display() + "archive {} into {}: {e}", + source.display(), + archive_path.display() ) - }) + })?; + builder + .finish() + .map_err(|e| format!("finalize {}: {e}", archive_path.display())) +} + +fn append_rootfs_tree_to_archive( + builder: &mut tar::Builder>, + source: &Path, + archive_prefix: &Path, +) -> Result<(), String> { + let mut entries = fs::read_dir(source) + .map_err(|e| format!("read {}: {e}", source.display()))? + .collect::, _>>() + .map_err(|e| format!("read {}: {e}", source.display()))?; + entries.sort_by_key(|entry| entry.file_name()); + + for entry in entries { + let entry_name = entry.file_name(); + let source_path = entry.path(); + let archive_path = if archive_prefix.as_os_str().is_empty() { + entry_name.into() + } else { + archive_prefix.join(entry_name) + }; + let metadata = fs::symlink_metadata(&source_path) + .map_err(|e| format!("stat {}: {e}", source_path.display()))?; + let file_type = metadata.file_type(); + + if file_type.is_dir() { + builder + .append_dir(&archive_path, &source_path) + .map_err(|e| format!("append dir {}: {e}", source_path.display()))?; + append_rootfs_tree_to_archive(builder, &source_path, &archive_path)?; + continue; + } + + if file_type.is_file() { + let mut file = File::open(&source_path) + .map_err(|e| format!("open {}: {e}", source_path.display()))?; + builder + .append_file(&archive_path, &mut file) + .map_err(|e| format!("append file {}: {e}", source_path.display()))?; + continue; + } + + if file_type.is_symlink() { + append_symlink_to_archive(builder, &source_path, &archive_path, &metadata)?; + continue; + } + + return Err(format!( + "unsupported rootfs entry type at {}", + source_path.display() + )); + } + + Ok(()) +} + +fn append_symlink_to_archive( + builder: &mut tar::Builder>, + source_path: &Path, + archive_path: &Path, + metadata: &fs::Metadata, +) -> Result<(), String> { + let target = fs::read_link(source_path) + .map_err(|e| format!("readlink {}: {e}", source_path.display()))?; + let mut header = tar::Header::new_gnu(); + header.set_metadata(metadata); + header.set_size(0); + header.set_cksum(); + builder + .append_link(&mut header, archive_path, target) + .map_err(|e| format!("append symlink {}: {e}", source_path.display())) } fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { @@ -103,6 +167,8 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { .map_err(|e| format!("chmod {}: {e}", init_path.display()))?; } + ensure_supervisor_binary(rootfs)?; + let opt_dir = rootfs.join("opt/openshell"); fs::create_dir_all(&opt_dir).map_err(|e| format!("create {}: {e}", opt_dir.display()))?; fs::write(opt_dir.join(".rootfs-type"), "sandbox\n") @@ -114,6 +180,19 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { Ok(()) } +pub fn validate_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { + require_rootfs_path(rootfs, SANDBOX_GUEST_INIT_PATH)?; + require_rootfs_path(rootfs, "/opt/openshell/bin/openshell-sandbox")?; + require_any_rootfs_path(rootfs, &["/bin/bash"])?; + require_any_rootfs_path(rootfs, &["/bin/mount", "/usr/bin/mount"])?; + require_any_rootfs_path( + rootfs, + &["/sbin/ip", "/usr/sbin/ip", "/bin/ip", "/usr/bin/ip"], + )?; + require_any_rootfs_path(rootfs, &["/bin/sed", "/usr/bin/sed"])?; + Ok(()) +} + fn ensure_sandbox_guest_user(rootfs: &Path) -> Result<(), String> { const SANDBOX_UID: u32 = 10001; const SANDBOX_GID: u32 = 10001; @@ -167,6 +246,62 @@ fn ensure_line_in_file( fs::write(path, contents).map_err(|e| format!("write {}: {e}", path.display())) } +fn ensure_supervisor_binary(rootfs: &Path) -> Result<(), String> { + let path = rootfs.join(SANDBOX_SUPERVISOR_PATH.trim_start_matches('/')); + if SUPERVISOR.is_empty() { + if !path.exists() { + return Err( + "sandbox supervisor not embedded. Build openshell-driver-vm with OPENSHELL_VM_RUNTIME_COMPRESSED_DIR set and run `mise run vm:setup && mise run vm:supervisor` first" + .to_string(), + ); + } + } else { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?; + } + + let supervisor = zstd::decode_all(Cursor::new(SUPERVISOR)) + .map_err(|e| format!("decompress supervisor: {e}"))?; + fs::write(&path, supervisor).map_err(|e| format!("write {}: {e}", path.display()))?; + } + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + fs::set_permissions(&path, fs::Permissions::from_mode(0o755)) + .map_err(|e| format!("chmod {}: {e}", path.display()))?; + } + + Ok(()) +} + +fn require_rootfs_path(rootfs: &Path, relative: &str) -> Result<(), String> { + let candidate = rootfs.join(relative.trim_start_matches('/')); + if candidate.exists() { + Ok(()) + } else { + Err(format!( + "prepared rootfs is missing {}", + candidate.display() + )) + } +} + +fn require_any_rootfs_path(rootfs: &Path, candidates: &[&str]) -> Result<(), String> { + if candidates + .iter() + .any(|candidate| rootfs.join(candidate.trim_start_matches('/')).exists()) + { + Ok(()) + } else { + Err(format!( + "prepared rootfs is missing one of: {}", + candidates.join(", ") + )) + } +} + fn remove_rootfs_path(rootfs: &Path, relative: &str) -> Result<(), String> { let path = rootfs.join(relative); if !path.exists() { @@ -198,9 +333,15 @@ mod tests { fs::create_dir_all(rootfs.join("var/lib/rancher")).expect("create var/lib/rancher"); fs::create_dir_all(rootfs.join("opt/openshell/charts")).expect("create charts"); fs::create_dir_all(rootfs.join("opt/openshell/manifests")).expect("create manifests"); + fs::create_dir_all(rootfs.join("opt/openshell/bin")).expect("create openshell bin"); fs::write(rootfs.join("usr/local/bin/k3s"), b"k3s").expect("write k3s"); fs::write(rootfs.join("usr/local/bin/kubectl"), b"kubectl").expect("write kubectl"); fs::write(rootfs.join("opt/openshell/.initialized"), b"yes").expect("write initialized"); + fs::write( + rootfs.join("opt/openshell/bin/openshell-sandbox"), + b"sandbox", + ) + .expect("write openshell-sandbox"); fs::write( rootfs.join("etc/passwd"), "root:x:0:0:root:/root:/bin/bash\n", @@ -208,8 +349,15 @@ mod tests { .expect("write passwd"); fs::write(rootfs.join("etc/group"), "root:x:0:\n").expect("write group"); fs::write(rootfs.join("etc/hosts"), "127.0.0.1 localhost\n").expect("write hosts"); + fs::create_dir_all(rootfs.join("bin")).expect("create bin"); + fs::create_dir_all(rootfs.join("sbin")).expect("create sbin"); + fs::write(rootfs.join("bin/bash"), b"bash").expect("write bash"); + fs::write(rootfs.join("bin/mount"), b"mount").expect("write mount"); + fs::write(rootfs.join("bin/sed"), b"sed").expect("write sed"); + fs::write(rootfs.join("sbin/ip"), b"ip").expect("write ip"); prepare_sandbox_rootfs(&rootfs).expect("prepare sandbox rootfs"); + validate_sandbox_rootfs(&rootfs).expect("validate sandbox rootfs"); assert!(!rootfs.join("usr/local/bin/k3s").exists()); assert!(!rootfs.join("usr/local/bin/kubectl").exists()); @@ -236,6 +384,37 @@ mod tests { let _ = fs::remove_dir_all(&dir); } + #[cfg(unix)] + #[test] + fn create_rootfs_archive_preserves_broken_symlinks() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + let extracted = dir.join("extracted"); + let archive = dir.join("rootfs.tar"); + + fs::create_dir_all(rootfs.join("etc")).expect("create etc"); + fs::write(rootfs.join("etc/hosts"), "127.0.0.1 localhost\n").expect("write hosts"); + std::os::unix::fs::symlink("/proc/self/mounts", rootfs.join("etc/mtab")) + .expect("create symlink"); + + create_rootfs_archive_from_dir(&rootfs, &archive).expect("archive rootfs"); + extract_rootfs_archive_to(&archive, &extracted).expect("extract rootfs"); + + let extracted_link = extracted.join("etc/mtab"); + assert!( + fs::symlink_metadata(&extracted_link) + .unwrap() + .file_type() + .is_symlink() + ); + assert_eq!( + fs::read_link(&extracted_link).expect("read extracted symlink"), + PathBuf::from("/proc/self/mounts") + ); + + let _ = fs::remove_dir_all(&dir); + } + fn unique_temp_dir() -> PathBuf { static COUNTER: AtomicU64 = AtomicU64::new(0); let nanos = SystemTime::now() diff --git a/crates/openshell-driver-vm/start.sh b/crates/openshell-driver-vm/start.sh index d98bb7b91..b2eeba9d2 100755 --- a/crates/openshell-driver-vm/start.sh +++ b/crates/openshell-driver-vm/start.sh @@ -15,28 +15,21 @@ if [ -n "${SUDO_USER:-}" ]; then fi ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true CLI_BIN="${ROOT}/scripts/bin/openshell" -COMPRESSED_DIR="${ROOT}/target/vm-runtime-compressed" -SERVER_PORT="${OPENSHELL_SERVER_PORT:-8080}" -# Keep the driver socket path under AF_UNIX SUN_LEN on macOS. +ENV_FILE="${ROOT}/.env" +COMPRESSED_DIR_DEFAULT="${ROOT}/target/vm-runtime-compressed" +COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR_DEFAULT}}" +SERVER_PORT_REQUESTED="${OPENSHELL_SERVER_PORT:-${GATEWAY_PORT:-}}" +SERVER_PORT="${SERVER_PORT_REQUESTED:-}" STATE_DIR_ROOT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}" -STATE_LABEL_RAW="${OPENSHELL_VM_INSTANCE:-port-${SERVER_PORT}}" -STATE_LABEL="$(printf '%s' "${STATE_LABEL_RAW}" | tr -cs '[:alnum:]._-' '-')" -if [ -z "${STATE_LABEL}" ]; then - STATE_LABEL="port-${SERVER_PORT}" -fi -STATE_DIR_DEFAULT="${STATE_DIR_ROOT}/openshell-vm-driver-dev-${USER:-user}-${STATE_LABEL}" -STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${STATE_DIR_DEFAULT}}" -DB_PATH_DEFAULT="${STATE_DIR}/openshell.db" VM_HOST_GATEWAY_DEFAULT="${OPENSHELL_VM_HOST_GATEWAY:-host.containers.internal}" -LOCAL_GATEWAY_ENDPOINT_DEFAULT="http://127.0.0.1:${SERVER_PORT}" -LOCAL_GATEWAY_ENDPOINT="${OPENSHELL_VM_LOCAL_GATEWAY_ENDPOINT:-${LOCAL_GATEWAY_ENDPOINT_DEFAULT}}" -GATEWAY_NAME_DEFAULT="vm-driver-${STATE_LABEL}" -GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-${GATEWAY_NAME_DEFAULT}}" DRIVER_DIR_DEFAULT="${ROOT}/target/debug" DRIVER_DIR="${OPENSHELL_DRIVER_DIR:-${DRIVER_DIR_DEFAULT}}" -export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR}}" +normalize_name() { + printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//' +} for arg in "$@"; do if [ "${arg}" = "--gpu" ]; then @@ -45,7 +38,54 @@ for arg in "$@"; do fi done -mkdir -p "${STATE_DIR}" +has_env_key() { + local key=$1 + [ -f "${ENV_FILE}" ] || return 1 + grep -Eq "^[[:space:]]*(export[[:space:]]+)?${key}=" "${ENV_FILE}" +} + +append_env_if_missing() { + local key=$1 + local value=$2 + if has_env_key "${key}"; then + return + fi + if [ -f "${ENV_FILE}" ] && [ -s "${ENV_FILE}" ]; then + if [ "$(tail -c1 "${ENV_FILE}" | wc -l)" -eq 0 ]; then + printf "\n" >>"${ENV_FILE}" + fi + fi + printf "%s=%s\n" "${key}" "${value}" >>"${ENV_FILE}" +} + +upsert_env_key() { + local key=$1 + local value=$2 + local tmp_file + + tmp_file="$(mktemp "${ENV_FILE}.tmp.XXXXXX")" + if [ -f "${ENV_FILE}" ]; then + awk -v key="${key}" -v value="${value}" ' + BEGIN { updated = 0 } + $0 ~ "^[[:space:]]*(export[[:space:]]+)?" key "=" { + if (!updated) { + print key "=" value + updated = 1 + } + next + } + { print } + END { + if (!updated) { + print key "=" value + } + } + ' "${ENV_FILE}" >"${tmp_file}" + else + printf "%s=%s\n" "${key}" "${value}" >"${tmp_file}" + fi + mv "${tmp_file}" "${ENV_FILE}" +} normalize_bool() { case "${1,,}" in @@ -58,6 +98,39 @@ normalize_bool() { esac } +port_is_in_use() { + local port=$1 + if command -v lsof >/dev/null 2>&1; then + lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 + return $? + fi + + if command -v nc >/dev/null 2>&1; then + nc -z 127.0.0.1 "${port}" >/dev/null 2>&1 + return $? + fi + + (echo >/dev/tcp/127.0.0.1/"${port}") >/dev/null 2>&1 +} + +pick_random_port() { + local lower=20000 + local upper=60999 + local attempts=256 + local port + + for _ in $(seq 1 "${attempts}"); do + port=$((RANDOM % (upper - lower + 1) + lower)) + if ! port_is_in_use "${port}"; then + echo "${port}" + return 0 + fi + done + + echo "ERROR: could not find a free port after ${attempts} attempts." >&2 + return 1 +} + check_supervisor_cross_toolchain() { # The sandbox supervisor inside the guest is always Linux. On non-Linux # hosts (macOS) and on Linux hosts with a different arch than the guest, @@ -90,23 +163,54 @@ check_supervisor_cross_toolchain() { fi } -if [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs.tar.zst" ]; then - check_supervisor_cross_toolchain - echo "==> Building base VM rootfs tarball" - mise run vm:rootfs -- --base +if [ -n "${SERVER_PORT_REQUESTED}" ]; then + if port_is_in_use "${SERVER_PORT}"; then + echo "ERROR: requested gateway port ${SERVER_PORT} is already in use." >&2 + echo " Update .env GATEWAY_PORT or override it for one run:" >&2 + echo " OPENSHELL_SERVER_PORT= mise run gateway:vm" >&2 + exit 1 + fi +else + SERVER_PORT="$(pick_random_port)" + append_env_if_missing "GATEWAY_PORT" "${SERVER_PORT}" fi -if [ "${OPENSHELL_VM_GPU:-}" = "true" ] && [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs-gpu.tar.zst" ]; then - check_supervisor_cross_toolchain - echo "==> Building GPU VM rootfs tarball" - mise run vm:rootfs -- --gpu +GATEWAY_NAME_DEFAULT="$(basename "${ROOT}")" +GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-${GATEWAY_NAME_DEFAULT}}" +if [ -z "${GATEWAY_NAME}" ]; then + GATEWAY_NAME="openshell" +fi + +# Keep the driver socket path under AF_UNIX SUN_LEN on macOS. +STATE_LABEL_RAW="${OPENSHELL_VM_INSTANCE:-$(normalize_name "${GATEWAY_NAME}")}" +STATE_LABEL="$(printf '%s' "${STATE_LABEL_RAW}" | tr -cs '[:alnum:]._-' '-')" +if [ -z "${STATE_LABEL}" ]; then + STATE_LABEL="gateway" fi +STATE_DIR_DEFAULT="${STATE_DIR_ROOT}/openshell-vm-driver-dev-${USER:-user}-${STATE_LABEL}" +STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${STATE_DIR_DEFAULT}}" +DB_PATH_DEFAULT="${STATE_DIR}/openshell.db" +LOCAL_GATEWAY_ENDPOINT_DEFAULT="http://127.0.0.1:${SERVER_PORT}" +LOCAL_GATEWAY_ENDPOINT="${OPENSHELL_VM_LOCAL_GATEWAY_ENDPOINT:-${LOCAL_GATEWAY_ENDPOINT_DEFAULT}}" + +export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${COMPRESSED_DIR}" +export OPENSHELL_GATEWAY="${GATEWAY_NAME}" -if [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs.tar.zst" ] || ! find "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q .; then +upsert_env_key "OPENSHELL_GATEWAY" "${GATEWAY_NAME}" + +mkdir -p "${STATE_DIR}" + +if [ ! -d "${COMPRESSED_DIR}" ] || ! find "${COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q . || [ ! -f "${COMPRESSED_DIR}/gvproxy.zst" ]; then echo "==> Preparing embedded VM runtime" mise run vm:setup fi +if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then + check_supervisor_cross_toolchain + echo "==> Building bundled VM supervisor" + mise run vm:supervisor +fi + echo "==> Building gateway and VM compute driver" cargo build -p openshell-server -p openshell-driver-vm @@ -123,7 +227,9 @@ export OPENSHELL_DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}") export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${DB_PATH_DEFAULT}}" export OPENSHELL_DRIVERS="${OPENSHELL_DRIVERS:-vm}" export OPENSHELL_DRIVER_DIR="${DRIVER_DIR}" +export OPENSHELL_SERVER_PORT="${SERVER_PORT}" export OPENSHELL_GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-http://${VM_HOST_GATEWAY_DEFAULT}:${SERVER_PORT}}" +export OPENSHELL_SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-${COMMUNITY_SANDBOX_IMAGE:-}}" export OPENSHELL_SSH_GATEWAY_HOST="${OPENSHELL_SSH_GATEWAY_HOST:-127.0.0.1}" export OPENSHELL_SSH_GATEWAY_PORT="${OPENSHELL_SSH_GATEWAY_PORT:-${SERVER_PORT}}" export OPENSHELL_SSH_HANDSHAKE_SECRET="${OPENSHELL_SSH_HANDSHAKE_SECRET:-dev-vm-driver-secret}" @@ -142,24 +248,40 @@ if [ -z "${OPENSHELL_VM_RUNTIME_DIR:-}" ]; then fi fi -echo "==> Registering gateway" -echo " Name: ${GATEWAY_NAME}" -echo " Endpoint: ${LOCAL_GATEWAY_ENDPOINT}" -echo " Driver: ${OPENSHELL_DRIVER_DIR}/openshell-driver-vm" - -# GPU passthrough requires root, but gateway config must be written to the -# real user's home directory — not /root/.config/openshell/. -# Unset XDG_CONFIG_HOME so the CLI falls back to $HOME/.config (sudo -u -# sets HOME correctly but may inherit XDG_CONFIG_HOME from the root env). +# Write gateway metadata to the invoking user's config dir, even under sudo. +# When running under sudo, $HOME points at /root and XDG_CONFIG_HOME may also +# be inherited from the root env; fall back to SUDO_USER's home directory so +# repo-local `scripts/bin/openshell` reads the same config. if [ -n "${SUDO_USER:-}" ]; then - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway destroy --name "${GATEWAY_NAME}" 2>/dev/null || true - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway add --name "${GATEWAY_NAME}" "${LOCAL_GATEWAY_ENDPOINT}" - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway select "${GATEWAY_NAME}" + _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) + GATEWAY_CONFIG_BASE="${_sudo_home}/.config" else - "${CLI_BIN}" gateway destroy --name "${GATEWAY_NAME}" 2>/dev/null || true - "${CLI_BIN}" gateway add --name "${GATEWAY_NAME}" "${LOCAL_GATEWAY_ENDPOINT}" - "${CLI_BIN}" gateway select "${GATEWAY_NAME}" + GATEWAY_CONFIG_BASE="${XDG_CONFIG_HOME:-${HOME}/.config}" +fi +GATEWAY_METADATA_DIR="${GATEWAY_CONFIG_BASE}/openshell/gateways/${GATEWAY_NAME}" +mkdir -p "${GATEWAY_METADATA_DIR}" +cat >"${GATEWAY_METADATA_DIR}/metadata.json" </dev/null || true fi +echo "==> Gateway config" +echo " Name: ${GATEWAY_NAME}" +echo " Endpoint: ${LOCAL_GATEWAY_ENDPOINT}" +echo " .env: OPENSHELL_GATEWAY=${GATEWAY_NAME}" +echo " .env: GATEWAY_PORT=${SERVER_PORT}" +echo " Driver: ${OPENSHELL_DRIVER_DIR}/openshell-driver-vm" +echo " Image: ${OPENSHELL_SANDBOX_IMAGE}" +echo " Status: ${CLI_BIN} status" +echo " Create: ${CLI_BIN} sandbox create --name vm-test --from ubuntu:24.04" + echo "==> Starting OpenShell server with VM compute driver" exec "${ROOT}/target/debug/openshell-gateway" diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index d9d81eed7..f083b6999 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -334,6 +334,7 @@ async fn run_from_args(args: Args) -> Result<()> { let vm_config = VmComputeConfig { state_dir: args.vm_driver_state_dir, driver_dir: args.driver_dir, + default_image: config.sandbox_image.clone(), krun_log_level: args.vm_krun_log_level, vcpus: args.vm_vcpus, mem_mib: args.vm_mem_mib, diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index 844b14240..e5d2880ec 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -63,6 +63,9 @@ pub struct VmComputeConfig { /// falls back to its conventional install paths and sibling binary. pub driver_dir: Option, + /// Default sandbox image the driver should use when a request omits one. + pub default_image: String, + /// libkrun log level used by the VM driver helper. pub krun_log_level: u32, @@ -124,6 +127,7 @@ impl Default for VmComputeConfig { Self { state_dir: Self::default_state_dir(), driver_dir: None, + default_image: String::new(), krun_log_level: Self::default_krun_log_level(), vcpus: Self::default_vcpus(), mem_mib: Self::default_mem_mib(), @@ -303,6 +307,9 @@ pub async fn spawn( .arg("--openshell-endpoint") .arg(&config.grpc_endpoint); command.arg("--state-dir").arg(&vm_config.state_dir); + if !vm_config.default_image.trim().is_empty() { + command.arg("--default-image").arg(&vm_config.default_image); + } command .arg("--ssh-handshake-secret") .arg(&config.ssh_handshake_secret); diff --git a/deploy/docker/Dockerfile.driver-vm-macos b/deploy/docker/Dockerfile.driver-vm-macos index ac0aec952..1932905e3 100644 --- a/deploy/docker/Dockerfile.driver-vm-macos +++ b/deploy/docker/Dockerfile.driver-vm-macos @@ -8,7 +8,7 @@ # # openshell-driver-vm loads libkrun/libkrunfw at runtime via dlopen, so it # does NOT need Hypervisor.framework headers at build time. Pre-compressed -# runtime artifacts (libkrun, libkrunfw, gvproxy, rootfs) are injected via +# runtime artifacts (libkrun, libkrunfw, gvproxy, bundled supervisor) are injected via # the vm-runtime-compressed build context and embedded into the binary via # include_bytes!(). # diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx index 2f26c7bfb..28d093ecb 100644 --- a/docs/get-started/quickstart.mdx +++ b/docs/get-started/quickstart.mdx @@ -107,6 +107,13 @@ For example, to pull the `base` image, run the following command: openshell sandbox create --from base ``` +You can also point `--from` at a local Dockerfile or directory on disk when +using a local gateway: + +```shell +openshell sandbox create --from ./my-sandbox-dir +``` + diff --git a/docs/sandboxes/community-sandboxes.mdx b/docs/sandboxes/community-sandboxes.mdx index 32668d3e5..80ce32360 100644 --- a/docs/sandboxes/community-sandboxes.mdx +++ b/docs/sandboxes/community-sandboxes.mdx @@ -15,7 +15,7 @@ own. Community sandboxes are ready-to-use environments published in the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository. Each sandbox bundles a Dockerfile, policy, optional skills, and startup scripts -into a single package that you can launch with one command. +into a single package that can be published as a pre-built sandbox image. ## Current Catalog @@ -40,12 +40,11 @@ When you pass `--from` with a community sandbox name, the CLI: 1. Resolves the name against the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository. -2. Pulls the Dockerfile, policy, skills, and any startup scripts. -3. Builds the container image locally. -4. Creates the sandbox with the bundled configuration applied. +2. Converts the catalog name into the published sandbox image reference. +3. Creates the sandbox with that image and the bundled community defaults. -You end up with a running sandbox whose image, policy, and tooling are all -preconfigured by the community package. +You end up with a running sandbox whose image and tooling are preconfigured by +the community package. ### Other Sources @@ -58,12 +57,26 @@ The `--from` flag also accepts: openshell sandbox create --from ./my-sandbox-dir ``` + This local Dockerfile flow is supported only when the selected gateway runs on + the same machine as the CLI. The CLI builds the image in the local Docker + daemon. For local Kubernetes gateways it also imports that image into the + gateway container runtime. For local VM gateways, the CLI exports the built + image as a local rootfs tar artifact and the VM driver consumes that artifact. + - Container image references: Use an existing container image directly: ```shell openshell sandbox create --from my-registry.example.com/my-image:latest ``` + On the VM backend, the image becomes the base guest rootfs for that sandbox. + The VM driver prepares and caches a rewritten rootfs per immutable image + identity, so different VM sandboxes can run with different `--from` images at + the same time. VM images must remain base-compatible with the guest init path. + Prepared VM rootfs caches stay on disk until they are removed from the VM + driver state directory. Docker is not required for registry or community image + refs on the VM backend. + ## Contribute a Community Sandbox Each community sandbox is a directory under `sandboxes/` in the diff --git a/tasks/scripts/vm/build-supervisor-bundle.sh b/tasks/scripts/vm/build-supervisor-bundle.sh new file mode 100755 index 000000000..9e3995a33 --- /dev/null +++ b/tasks/scripts/vm/build-supervisor-bundle.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}" + +GUEST_ARCH="" +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + GUEST_ARCH="$2" + shift 2 + ;; + --arch=*) + GUEST_ARCH="${1#--arch=}" + shift + ;; + --help|-h) + echo "Usage: $0 [--arch aarch64|x86_64]" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [ -z "${GUEST_ARCH}" ]; then + case "$(uname -m)" in + aarch64|arm64) GUEST_ARCH="aarch64" ;; + x86_64|amd64) GUEST_ARCH="x86_64" ;; + *) + echo "ERROR: Unsupported host architecture: $(uname -m)" >&2 + echo " Use --arch aarch64 or --arch x86_64 to override." >&2 + exit 1 + ;; + esac +fi + +case "${GUEST_ARCH}" in + aarch64|arm64) + RUST_TARGET="aarch64-unknown-linux-gnu" + ;; + x86_64|amd64) + RUST_TARGET="x86_64-unknown-linux-gnu" + ;; + *) + echo "ERROR: Unsupported guest architecture: ${GUEST_ARCH}" >&2 + echo " Supported: aarch64, x86_64" >&2 + exit 1 + ;; +esac + +SUPERVISOR_BIN="${ROOT}/target/${RUST_TARGET}/release/openshell-sandbox" +SUPERVISOR_OUTPUT="${OUTPUT_DIR}/openshell-sandbox.zst" + +echo "==> Building openshell-sandbox supervisor bundle" +echo " Guest arch: ${GUEST_ARCH}" +echo " Rust target: ${RUST_TARGET}" +echo " Output: ${SUPERVISOR_OUTPUT}" + +mkdir -p "${OUTPUT_DIR}" + +SUPERVISOR_BUILD_LOG="$(mktemp -t openshell-supervisor-build.XXXXXX.log)" +run_supervisor_build() { + if command -v cargo-zigbuild >/dev/null 2>&1; then + cargo zigbuild --release -p openshell-sandbox --target "${RUST_TARGET}" \ + --manifest-path "${ROOT}/Cargo.toml" + else + echo " cargo-zigbuild not found, falling back to cargo build..." + cargo build --release -p openshell-sandbox --target "${RUST_TARGET}" \ + --manifest-path "${ROOT}/Cargo.toml" + fi +} + +if run_supervisor_build >"${SUPERVISOR_BUILD_LOG}" 2>&1; then + tail -5 "${SUPERVISOR_BUILD_LOG}" + rm -f "${SUPERVISOR_BUILD_LOG}" +else + status=$? + echo "ERROR: supervisor build failed. Full output:" >&2 + cat "${SUPERVISOR_BUILD_LOG}" >&2 + echo " (log saved at ${SUPERVISOR_BUILD_LOG})" >&2 + exit "${status}" +fi + +if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: supervisor binary not found at ${SUPERVISOR_BIN}" >&2 + exit 1 +fi + +zstd -19 -T0 -f "${SUPERVISOR_BIN}" -o "${SUPERVISOR_OUTPUT}" + +echo "==> Bundled supervisor ready" +echo " Binary: $(du -sh "${SUPERVISOR_BIN}" | cut -f1)" +echo " Compressed: $(du -sh "${SUPERVISOR_OUTPUT}" | cut -f1)" diff --git a/tasks/scripts/vm/vm-setup.sh b/tasks/scripts/vm/vm-setup.sh index bccb7f754..7563819b9 100755 --- a/tasks/scripts/vm/vm-setup.sh +++ b/tasks/scripts/vm/vm-setup.sh @@ -21,6 +21,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/_lib.sh" ROOT="$(vm_lib_root)" +CLI_BIN="${ROOT}/scripts/bin/openshell" FROM_SOURCE="${FROM_SOURCE:-0}" @@ -126,6 +127,6 @@ echo "" echo "==> Setup complete!" echo " Compressed artifacts in: ${OUTPUT_DIR}" echo "" -echo "Next steps:" -echo " mise run vm:rootfs --base # build rootfs (requires Docker)" -echo " mise run gateway:vm # start openshell-gateway with the VM driver" +echo "After starting the gateway:" +echo " ${CLI_BIN} status" +echo " ${CLI_BIN} sandbox create --name vm-test --from ubuntu:24.04" diff --git a/tasks/vm.toml b/tasks/vm.toml index 2549f230f..410416000 100644 --- a/tasks/vm.toml +++ b/tasks/vm.toml @@ -5,6 +5,7 @@ # # Workflow: # mise run vm:setup # one-time: download pre-built runtime (~30s) +# mise run vm:supervisor # build the bundled sandbox supervisor # mise run gateway:vm # start openshell-gateway with the VM driver # mise run vm # build + run the standalone openshell-vm microVM # mise run vm:clean # wipe everything and start over @@ -38,6 +39,10 @@ run = [ description = "One-time setup: download (or build) the VM runtime" run = "tasks/scripts/vm/vm-setup.sh" +["vm:supervisor"] +description = "Build the bundled openshell-sandbox supervisor for openshell-driver-vm" +run = "tasks/scripts/vm/build-supervisor-bundle.sh" + ["vm:rootfs"] description = "Build the VM rootfs tarball (use -- --base for lightweight)" run = "tasks/scripts/vm/build-rootfs-tarball.sh" From 544d62c6da063a1684363838835dfe726786528e Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Sun, 26 Apr 2026 23:07:01 -0700 Subject: [PATCH 02/11] docs(vm): remove public-facing doc changes for VM rootfs work Revert docs/get-started/quickstart.mdx and docs/sandboxes/community-sandboxes.mdx to their pre-VM-rootfs state. Keep these changes internal until the feature is finalized. Signed-off-by: Drew Newberry --- docs/get-started/quickstart.mdx | 7 ------- docs/sandboxes/community-sandboxes.mdx | 25 ++++++------------------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx index 28d093ecb..2f26c7bfb 100644 --- a/docs/get-started/quickstart.mdx +++ b/docs/get-started/quickstart.mdx @@ -107,13 +107,6 @@ For example, to pull the `base` image, run the following command: openshell sandbox create --from base ``` -You can also point `--from` at a local Dockerfile or directory on disk when -using a local gateway: - -```shell -openshell sandbox create --from ./my-sandbox-dir -``` - diff --git a/docs/sandboxes/community-sandboxes.mdx b/docs/sandboxes/community-sandboxes.mdx index 80ce32360..32668d3e5 100644 --- a/docs/sandboxes/community-sandboxes.mdx +++ b/docs/sandboxes/community-sandboxes.mdx @@ -15,7 +15,7 @@ own. Community sandboxes are ready-to-use environments published in the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository. Each sandbox bundles a Dockerfile, policy, optional skills, and startup scripts -into a single package that can be published as a pre-built sandbox image. +into a single package that you can launch with one command. ## Current Catalog @@ -40,11 +40,12 @@ When you pass `--from` with a community sandbox name, the CLI: 1. Resolves the name against the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository. -2. Converts the catalog name into the published sandbox image reference. -3. Creates the sandbox with that image and the bundled community defaults. +2. Pulls the Dockerfile, policy, skills, and any startup scripts. +3. Builds the container image locally. +4. Creates the sandbox with the bundled configuration applied. -You end up with a running sandbox whose image and tooling are preconfigured by -the community package. +You end up with a running sandbox whose image, policy, and tooling are all +preconfigured by the community package. ### Other Sources @@ -57,26 +58,12 @@ The `--from` flag also accepts: openshell sandbox create --from ./my-sandbox-dir ``` - This local Dockerfile flow is supported only when the selected gateway runs on - the same machine as the CLI. The CLI builds the image in the local Docker - daemon. For local Kubernetes gateways it also imports that image into the - gateway container runtime. For local VM gateways, the CLI exports the built - image as a local rootfs tar artifact and the VM driver consumes that artifact. - - Container image references: Use an existing container image directly: ```shell openshell sandbox create --from my-registry.example.com/my-image:latest ``` - On the VM backend, the image becomes the base guest rootfs for that sandbox. - The VM driver prepares and caches a rewritten rootfs per immutable image - identity, so different VM sandboxes can run with different `--from` images at - the same time. VM images must remain base-compatible with the guest init path. - Prepared VM rootfs caches stay on disk until they are removed from the VM - driver state directory. Docker is not required for registry or community image - refs on the VM backend. - ## Contribute a Community Sandbox Each community sandbox is a directory under `sandboxes/` in the From b25c143a2b65d7d2cbd901084f2a94a999ef4e74 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Mon, 27 Apr 2026 08:12:14 -0700 Subject: [PATCH 03/11] wip --- .../scripts/openshell-vm-sandbox-init.sh | 9 +- crates/openshell-driver-vm/start.sh | 20 +++- tasks/scripts/vm/build-supervisor-bundle.sh | 95 +++++++++++++++++-- 3 files changed, 115 insertions(+), 9 deletions(-) diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh index 1c009a7f1..8c38abdb4 100644 --- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh +++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh @@ -96,15 +96,20 @@ tcp_probe() { } ensure_host_gateway_aliases() { + # Pin every host-gateway alias to the gvproxy gateway IP via /etc/hosts so the + # supervisor can reach the OpenShell server even when gvproxy's built-in DNS + # is not in resolv.conf (e.g. when DHCP fails and we fall back to 8.8.8.8). local hosts_tmp="/tmp/openshell-hosts.$$" + local aliases="host.openshell.internal host.containers.internal host.docker.internal gateway.containers.internal" + local filter='(^|[[:space:]])(host\.openshell\.internal|host\.containers\.internal|host\.docker\.internal|gateway\.containers\.internal)([[:space:]]|$)' if [ -f /etc/hosts ]; then - grep -vE '(^|[[:space:]])host\.openshell\.internal([[:space:]]|$)' /etc/hosts > "$hosts_tmp" || true + grep -vE "$filter" /etc/hosts > "$hosts_tmp" || true else : > "$hosts_tmp" fi - printf '%s host.openshell.internal\n' "$GATEWAY_IP" >> "$hosts_tmp" + printf '%s %s\n' "$GATEWAY_IP" "$aliases" >> "$hosts_tmp" cat "$hosts_tmp" > /etc/hosts rm -f "$hosts_tmp" } diff --git a/crates/openshell-driver-vm/start.sh b/crates/openshell-driver-vm/start.sh index b2eeba9d2..c81cc22ed 100755 --- a/crates/openshell-driver-vm/start.sh +++ b/crates/openshell-driver-vm/start.sh @@ -163,6 +163,17 @@ check_supervisor_cross_toolchain() { fi } +build_gateway_and_driver() { + local rustc_wrapper_mode="${1:-default}" + local cargo_prefix=() + + if [ "${rustc_wrapper_mode}" = "without-rustc-wrapper" ]; then + cargo_prefix=(env -u RUSTC_WRAPPER) + fi + + "${cargo_prefix[@]}" cargo build -p openshell-server -p openshell-driver-vm +} + if [ -n "${SERVER_PORT_REQUESTED}" ]; then if port_is_in_use "${SERVER_PORT}"; then echo "ERROR: requested gateway port ${SERVER_PORT} is already in use." >&2 @@ -212,7 +223,14 @@ if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then fi echo "==> Building gateway and VM compute driver" -cargo build -p openshell-server -p openshell-driver-vm +if ! build_gateway_and_driver; then + if [ -n "${RUSTC_WRAPPER:-}" ]; then + echo "WARNING: gateway/driver build failed through RUSTC_WRAPPER=${RUSTC_WRAPPER}; retrying without RUSTC_WRAPPER." >&2 + build_gateway_and_driver without-rustc-wrapper + else + exit 1 + fi +fi if [ "$(uname -s)" = "Darwin" ]; then echo "==> Codesigning VM compute driver" diff --git a/tasks/scripts/vm/build-supervisor-bundle.sh b/tasks/scripts/vm/build-supervisor-bundle.sh index 9e3995a33..90f5b517d 100755 --- a/tasks/scripts/vm/build-supervisor-bundle.sh +++ b/tasks/scripts/vm/build-supervisor-bundle.sh @@ -58,34 +58,117 @@ esac SUPERVISOR_BIN="${ROOT}/target/${RUST_TARGET}/release/openshell-sandbox" SUPERVISOR_OUTPUT="${OUTPUT_DIR}/openshell-sandbox.zst" +ensure_build_nofile_limit() { + local desired="${OPENSHELL_VM_BUILD_NOFILE_LIMIT:-8192}" + local minimum=1024 + local current="" + local hard="" + local target="" + + [ "$(uname -s)" = "Darwin" ] || return 0 + command -v cargo-zigbuild >/dev/null 2>&1 || return 0 + + current="$(ulimit -n 2>/dev/null || echo "")" + case "${current}" in + ''|*[!0-9]*) + return 0 + ;; + esac + + if [ "${current}" -ge "${desired}" ]; then + return 0 + fi + + hard="$(ulimit -Hn 2>/dev/null || echo "")" + target="${desired}" + case "${hard}" in + ''|unlimited|infinity) + ;; + *[!0-9]*) + ;; + *) + if [ "${hard}" -lt "${target}" ]; then + target="${hard}" + fi + ;; + esac + + if [ "${target}" -gt "${current}" ] && ulimit -n "${target}" 2>/dev/null; then + echo "==> Raised open file limit for cargo-zigbuild: ${current} -> $(ulimit -n)" + fi + + current="$(ulimit -n 2>/dev/null || echo "${current}")" + case "${current}" in + ''|*[!0-9]*) + return 0 + ;; + esac + + if [ "${current}" -lt "${desired}" ]; then + echo "WARNING: Open file limit is ${current}; cargo-zigbuild is more reliable at ${desired}+ on macOS." + fi + + if [ "${current}" -lt "${minimum}" ]; then + echo "ERROR: Open file limit (${current}) is too low for cargo-zigbuild on macOS." >&2 + echo " Run: ulimit -n ${desired}" >&2 + echo " Then re-run this script." >&2 + exit 1 + fi +} + echo "==> Building openshell-sandbox supervisor bundle" echo " Guest arch: ${GUEST_ARCH}" echo " Rust target: ${RUST_TARGET}" echo " Output: ${SUPERVISOR_OUTPUT}" mkdir -p "${OUTPUT_DIR}" +ensure_build_nofile_limit SUPERVISOR_BUILD_LOG="$(mktemp -t openshell-supervisor-build.XXXXXX.log)" run_supervisor_build() { + local rustc_wrapper_mode="${1:-default}" + local cargo_prefix=() + + if [ "${rustc_wrapper_mode}" = "without-rustc-wrapper" ]; then + cargo_prefix=(env -u RUSTC_WRAPPER) + fi + if command -v cargo-zigbuild >/dev/null 2>&1; then - cargo zigbuild --release -p openshell-sandbox --target "${RUST_TARGET}" \ + "${cargo_prefix[@]}" cargo zigbuild --release -p openshell-sandbox --target "${RUST_TARGET}" \ --manifest-path "${ROOT}/Cargo.toml" else echo " cargo-zigbuild not found, falling back to cargo build..." - cargo build --release -p openshell-sandbox --target "${RUST_TARGET}" \ + "${cargo_prefix[@]}" cargo build --release -p openshell-sandbox --target "${RUST_TARGET}" \ --manifest-path "${ROOT}/Cargo.toml" fi } +print_build_failure() { + echo "ERROR: supervisor build failed. Full output:" >&2 + cat "${SUPERVISOR_BUILD_LOG}" >&2 + echo " (log saved at ${SUPERVISOR_BUILD_LOG})" >&2 +} + if run_supervisor_build >"${SUPERVISOR_BUILD_LOG}" 2>&1; then tail -5 "${SUPERVISOR_BUILD_LOG}" rm -f "${SUPERVISOR_BUILD_LOG}" else status=$? - echo "ERROR: supervisor build failed. Full output:" >&2 - cat "${SUPERVISOR_BUILD_LOG}" >&2 - echo " (log saved at ${SUPERVISOR_BUILD_LOG})" >&2 - exit "${status}" + if [ -n "${RUSTC_WRAPPER:-}" ] && grep -Eq 'sccache: encountered fatal error|Too many open files|os error 24' "${SUPERVISOR_BUILD_LOG}"; then + echo "WARNING: supervisor build failed through RUSTC_WRAPPER=${RUSTC_WRAPPER}; retrying without RUSTC_WRAPPER." >&2 + : >"${SUPERVISOR_BUILD_LOG}" + if run_supervisor_build without-rustc-wrapper >"${SUPERVISOR_BUILD_LOG}" 2>&1; then + tail -5 "${SUPERVISOR_BUILD_LOG}" + rm -f "${SUPERVISOR_BUILD_LOG}" + else + status=$? + print_build_failure + exit "${status}" + fi + else + print_build_failure + exit "${status}" + fi fi if [ ! -f "${SUPERVISOR_BIN}" ]; then From 2c4d8977c0ff871ca6ea869382e8a9cea993c414 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Mon, 27 Apr 2026 20:59:45 -0700 Subject: [PATCH 04/11] wip: gateway:vm refactor + driver-scoped ssh handshake check --- architecture/custom-vm-runtime.md | 11 +- crates/openshell-driver-vm/README.md | 67 ++-- .../scripts/openshell-vm-sandbox-init.sh | 52 ++- crates/openshell-driver-vm/src/driver.rs | 65 +++- crates/openshell-driver-vm/src/runtime.rs | 16 +- crates/openshell-driver-vm/start.sh | 305 ------------------ crates/openshell-server/src/compute/vm.rs | 11 +- crates/openshell-server/src/lib.rs | 75 +++-- e2e/rust/e2e-vm.sh | 9 +- tasks/gateway.toml | 4 + tasks/scripts/gateway-vm.sh | 225 +++++++++++++ tasks/vm.toml | 8 +- 12 files changed, 441 insertions(+), 407 deletions(-) delete mode 100755 crates/openshell-driver-vm/start.sh create mode 100755 tasks/scripts/gateway-vm.sh diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 18594d4af..3c721ae50 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -220,8 +220,8 @@ graph LR The `vm-runtime-.tar.zst` artifact is consumed by `openshell-driver-vm`'s `build.rs`, which embeds the library set into the binary via `include_bytes!()`. Setting `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` -at build time (wired up by `crates/openshell-driver-vm/start.sh`) points the -build at the staged artifacts. +at build time (wired up by `tasks/scripts/gateway-vm.sh`, registered as +`mise run gateway:vm`) points the build at the staged artifacts. ## Kernel Config Fragment @@ -315,14 +315,15 @@ cross-compiled via osxcross (no macOS runner needed for the binary build — only for the kernel build). macOS driver binaries produced via osxcross are not codesigned. Development -builds are signed automatically by `crates/openshell-driver-vm/start.sh`; a -packaged release needs signing in CI. +builds are signed automatically by `tasks/scripts/gateway-vm.sh` +(registered as `mise run gateway:vm`); a packaged release needs signing in +CI. ## Rollout Strategy 1. Custom runtime is embedded by default when building `openshell-driver-vm` with `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` set (wired up by - `crates/openshell-driver-vm/start.sh`). + `tasks/scripts/gateway-vm.sh`). 2. The sandbox init script validates kernel capabilities at boot and fails fast if missing. 3. For development, override with `OPENSHELL_VM_RUNTIME_DIR` to use a local diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index def0ae4db..d42dea442 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -35,10 +35,15 @@ Sandbox guests execute `/opt/openshell/bin/openshell-sandbox` as PID 1 inside th mise run gateway:vm ``` -First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:supervisor` builds the bundled guest supervisor. Subsequent runs are cached. To keep the Unix socket path under macOS `SUN_LEN`, `mise run gateway:vm` and `start.sh` default the state dir to `/tmp/openshell-vm-driver-dev-$USER-/` (SQLite DB + per-sandbox rootfs + `compute-driver.sock`) unless `OPENSHELL_VM_DRIVER_STATE_DIR` is set. -By default the wrapper names the gateway after the repo directory, writes `OPENSHELL_GATEWAY=` into `.env`, and writes plaintext local gateway metadata under `~/.config/openshell/gateways//metadata.json` so repo-local `scripts/bin/openshell status` and `sandbox create` resolve to the VM gateway without an extra `gateway select`. When running under `sudo`, the wrapper uses `sudo -u $SUDO_USER` for the registration so the config is written under the invoking user's home directory. Re-runs are idempotent. -If neither `OPENSHELL_SERVER_PORT` nor `GATEWAY_PORT` is set, the wrapper picks a random free local port once and appends `GATEWAY_PORT=` to `.env`. Later runs reuse that port through `mise`'s env loading. If you set `OPENSHELL_SERVER_PORT` explicitly, the wrapper uses it for that run and still fails fast on conflicts. -It also exports `OPENSHELL_DRIVER_DIR=$PWD/target/debug` before starting the gateway so local dev runs use the freshly built `openshell-driver-vm` instead of an older installed copy from `~/.local/libexec/openshell` or `/usr/local/libexec`. +First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:supervisor` builds the bundled guest supervisor. Subsequent runs are cached. + +By default `mise run gateway:vm`: + +- Listens on plaintext HTTP at `127.0.0.1:18081`. +- Registers the CLI gateway `vm-dev` by writing `~/.config/openshell/gateways/vm-dev/metadata.json`. It does not modify the workspace `.env`. +- Persists the gateway SQLite DB under `.cache/gateway-vm/gateway.db`. +- Places the VM driver state (per-sandbox rootfs + `compute-driver.sock`) under `/tmp/openshell-vm-driver-$USER-vm-dev/` so the AF_UNIX socket path stays under macOS `SUN_LEN`. +- Passes `--driver-dir $PWD/target/debug` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell` or `/usr/local/libexec`. For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges: @@ -48,45 +53,38 @@ sudo -E env "PATH=$PATH" mise run gateway:vm -- --gpu See [`architecture/vm-gpu-sandbox-guide.md`](../../architecture/vm-gpu-sandbox-guide.md) for full GPU prerequisites and usage. -Override via environment: +Point the CLI at the gateway with one of: ```shell -OPENSHELL_SERVER_PORT=9090 \ -OPENSHELL_SSH_HANDSHAKE_SECRET=$(openssl rand -hex 32) \ -crates/openshell-driver-vm/start.sh +openshell --gateway vm-dev status +openshell gateway select vm-dev # then plain `openshell ` ``` -If you want to pin the project port instead of using the `.env` default: +Override defaults via environment: ```shell -GATEWAY_PORT=28080 mise run gateway:vm -``` +# custom port (fails fast if in use) +OPENSHELL_SERVER_PORT=18091 mise run gateway:vm -If you want a custom state-dir suffix instead of the repo-name default, set `OPENSHELL_VM_INSTANCE`: - -```shell -GATEWAY_PORT=28081 \ -OPENSHELL_VM_INSTANCE=feature-a \ -mise run gateway:vm -``` - -If you want a custom CLI gateway name instead of the repo directory, set `OPENSHELL_VM_GATEWAY_NAME`: - -```shell -GATEWAY_PORT=28082 \ +# custom CLI gateway name + namespace OPENSHELL_VM_GATEWAY_NAME=vm-feature-a \ +OPENSHELL_SANDBOX_NAMESPACE=vm-feature-a \ mise run gateway:vm + +# custom sandbox image +OPENSHELL_SANDBOX_IMAGE=ghcr.io/example/sandbox:latest mise run gateway:vm ``` Teardown: ```shell -rm -rf /tmp/openshell-vm-driver-dev-$USER-$(basename "$PWD" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//') +rm -rf /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm +rm -rf "${XDG_CONFIG_HOME:-$HOME/.config}/openshell/gateways/vm-dev" ``` ## Manual equivalent -If you want to drive the launch yourself instead of using `start.sh`: +If you want to drive the launch yourself instead of using `mise run gateway:vm` (i.e. `tasks/scripts/gateway-vm.sh`): ```shell # 1. Stage runtime artifacts + supervisor bundle into target/vm-runtime-compressed/ @@ -103,18 +101,17 @@ codesign \ --force -s - target/debug/openshell-driver-vm # 4. Start the gateway with the VM driver -mkdir -p /tmp/openshell-vm-driver-dev-$USER-port-8080 +mkdir -p /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm target/debug/openshell-gateway \ --drivers vm \ --disable-tls \ - --database-url sqlite:/tmp/openshell-vm-driver-dev-$USER-port-8080/openshell.db \ + --db-url "sqlite:.cache/gateway-vm/gateway.db?mode=rwc" \ --driver-dir $PWD/target/debug \ + --sandbox-namespace vm-dev \ --sandbox-image \ - --grpc-endpoint http://host.containers.internal:8080 \ - --ssh-handshake-secret dev-vm-driver-secret \ - --ssh-gateway-host 127.0.0.1 \ - --ssh-gateway-port 8080 \ - --vm-driver-state-dir /tmp/openshell-vm-driver-dev-$USER-port-8080 + --grpc-endpoint http://host.containers.internal:18081 \ + --port 18081 \ + --vm-driver-state-dir /tmp/openshell-vm-driver-$USER-vm-dev ``` The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conventional install locations (`~/.local/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`), then a sibling of the gateway binary. @@ -124,7 +121,7 @@ The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conven | Flag | Env var | Default | Purpose | |---|---|---|---| | `--drivers vm` | `OPENSHELL_DRIVERS` | `kubernetes` | Select the VM compute driver. | -| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest calls back to. Use a host alias that resolves to the gateway's host from inside the VM (`host.containers.internal` comes from gvproxy DNS; the guest init script also seeds `host.openshell.internal` to `192.168.127.1`). | +| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest dials to reach the gateway. Use `http://host.containers.internal:` (or `host.docker.internal` / `host.openshell.internal`) so traffic flows through gvproxy's host-loopback NAT (HostIP `192.168.127.254` → host `127.0.0.1`). Loopback URLs like `http://127.0.0.1:` are rewritten automatically by the driver. The bare gateway IP (`192.168.127.1`) only carries gvproxy's own services and will not reach host-bound ports. | | `--vm-driver-state-dir DIR` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Per-sandbox rootfs, console logs, and the `compute-driver.sock` UDS. | | `--driver-dir DIR` | `OPENSHELL_DRIVER_DIR` | unset | Override the directory searched for `openshell-driver-vm`. | | `--vm-driver-vcpus N` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | vCPUs per sandbox. | @@ -154,7 +151,7 @@ Raise log verbosity for both processes: ```shell RUST_LOG=openshell_server=debug,openshell_driver_vm=debug \ - crates/openshell-driver-vm/start.sh + mise run gateway:vm ``` The VM guest's serial console is appended to `//console.log`. The `compute-driver.sock` lives at `/compute-driver.sock`; the gateway removes it on clean shutdown via `ManagedDriverProcess::drop`. @@ -177,4 +174,4 @@ The VM guest's serial console is appended to `//console.l ## TODOs - The gateway still configures the driver via CLI args; this will move to a gRPC bootstrap call so the driver interface is uniform across backends. See the `TODO(driver-abstraction)` notes in `crates/openshell-server/src/lib.rs` and `crates/openshell-server/src/compute/vm.rs`. -- macOS codesigning is handled by `start.sh`; a packaged release would need signing in CI. +- macOS codesigning is handled by `tasks/scripts/gateway-vm.sh`; a packaged release would need signing in CI. diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh index 8c38abdb4..063a75032 100644 --- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh +++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh @@ -15,7 +15,20 @@ if [ -f /srv/openshell-env.sh ]; then fi BOOT_START=$(date +%s%3N 2>/dev/null || date +%s) +# gvisor-tap-vsock subnet layout: +# 192.168.127.1 — gateway: gvproxy's DNS / DHCP / HTTP API. Does NOT +# proxy arbitrary host ports. +# 192.168.127.254 — host-loopback: NAT-rewritten to host's 127.0.0.1 by +# gvproxy's TCP/UDP/ICMP forwarder. Use this address +# (or any of the host.* hostnames below) to reach a +# service the host is listening on. +# The host.containers.internal / host.docker.internal DNS records served +# by gvproxy's embedded resolver point at 192.168.127.254. We mirror that +# in /etc/hosts so the supervisor can reach the gateway even when +# gvproxy's DNS is not in resolv.conf (e.g. DHCP failed and we fell +# back to 8.8.8.8). GVPROXY_GATEWAY_IP="192.168.127.1" +GVPROXY_HOST_LOOPBACK_IP="192.168.127.254" GATEWAY_IP="$GVPROXY_GATEWAY_IP" # Parse kernel cmdline for GPU and TAP networking parameters @@ -96,11 +109,19 @@ tcp_probe() { } ensure_host_gateway_aliases() { - # Pin every host-gateway alias to the gvproxy gateway IP via /etc/hosts so the - # supervisor can reach the OpenShell server even when gvproxy's built-in DNS - # is not in resolv.conf (e.g. when DHCP fails and we fall back to 8.8.8.8). + # Seed /etc/hosts with the well-known gvproxy hostnames so the supervisor + # can reach the OpenShell server even when gvproxy's built-in DNS is not + # in resolv.conf (e.g. when DHCP fails and we fall back to 8.8.8.8). + # + # Critical distinction: host.* aliases point at the gvproxy *host-loopback* + # IP (192.168.127.254), not the gateway IP (192.168.127.1). Only the + # host-loopback IP carries NAT rewriting to the host's 127.0.0.1 — the + # gateway IP only listens on gvproxy's own service ports (DNS:53, DHCP, + # HTTP API:80). Pinning host.containers.internal to the gateway IP + # silently breaks guest→host port reachability for arbitrary ports. local hosts_tmp="/tmp/openshell-hosts.$$" - local aliases="host.openshell.internal host.containers.internal host.docker.internal gateway.containers.internal" + local host_aliases="host.openshell.internal host.containers.internal host.docker.internal" + local gateway_aliases="gateway.containers.internal" local filter='(^|[[:space:]])(host\.openshell\.internal|host\.containers\.internal|host\.docker\.internal|gateway\.containers\.internal)([[:space:]]|$)' if [ -f /etc/hosts ]; then @@ -109,7 +130,18 @@ ensure_host_gateway_aliases() { : > "$hosts_tmp" fi - printf '%s %s\n' "$GATEWAY_IP" "$aliases" >> "$hosts_tmp" + # In TAP/GPU mode, GATEWAY_IP is overridden to VM_NET_GW (the host-side + # of the TAP), and the gateway is reachable directly there. In gvproxy + # mode, host.openshell.internal etc. need GVPROXY_HOST_LOOPBACK_IP + # (192.168.127.254) which is gvproxy's host-NAT entry, while + # gateway.containers.internal points at the gvproxy gateway itself. + if [ "${GATEWAY_IP}" = "${GVPROXY_GATEWAY_IP}" ]; then + printf '%s %s\n' "$GVPROXY_HOST_LOOPBACK_IP" "$host_aliases" >> "$hosts_tmp" + printf '%s %s\n' "$GVPROXY_GATEWAY_IP" "$gateway_aliases" >> "$hosts_tmp" + else + # TAP networking: gateway and host are both reachable at GATEWAY_IP. + printf '%s %s %s\n' "$GATEWAY_IP" "$host_aliases" "$gateway_aliases" >> "$hosts_tmp" + fi cat "$hosts_tmp" > /etc/hosts rm -f "$hosts_tmp" } @@ -134,7 +166,15 @@ rewrite_openshell_endpoint_if_needed() { return 0 fi - for candidate in host.openshell.internal host.containers.internal host.docker.internal "$GATEWAY_IP"; do + # Probe candidates in preference order. Hostnames first for informative + # log output, then a bare IP as a final safety net. In gvproxy mode the + # bare IP is the host-loopback (192.168.127.254). In TAP/GPU mode it's + # the TAP host gateway. + local fallback_ip="$GVPROXY_HOST_LOOPBACK_IP" + if [ "${GATEWAY_IP}" != "${GVPROXY_GATEWAY_IP}" ]; then + fallback_ip="$GATEWAY_IP" + fi + for candidate in host.openshell.internal host.containers.internal host.docker.internal "$fallback_ip"; do if [ "$candidate" = "$host" ]; then continue fi diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 7d4dbb9f9..6e2b25656 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -52,8 +52,36 @@ const DRIVER_NAME: &str = "openshell-driver-vm"; const WATCH_BUFFER: usize = 256; const DEFAULT_VCPUS: u8 = 2; const DEFAULT_MEM_MIB: u32 = 2048; +/// gvproxy gateway IP — runs DNS, DHCP, and the gvproxy HTTP API. Does **not** +/// proxy arbitrary host ports. const GVPROXY_GATEWAY_IP: &str = "192.168.127.1"; +/// gvproxy host-loopback IP — gvproxy's TCP/UDP/ICMP forwarder NAT-rewrites +/// this destination to the host's `127.0.0.1` and dials out from the host +/// process. This is the only address that transparently reaches host-bound +/// services without explicit `expose` rules. +/// +/// See gvisor-tap-vsock `cmd/gvproxy/config.go` (default NAT entry +/// `HostIP -> 127.0.0.1`) and `pkg/services/forwarder/tcp.go` (NAT lookup +/// before `net.Dial`). +/// +/// Code paths route via `GVPROXY_HOST_LOOPBACK_ALIAS` (DNS / /etc/hosts) +/// instead so logs stay readable; this constant is kept for documentation +/// and parity with the guest init script. +#[allow(dead_code)] +const GVPROXY_HOST_LOOPBACK_IP: &str = "192.168.127.254"; const OPENSHELL_HOST_GATEWAY_ALIAS: &str = "host.openshell.internal"; +/// Hostname gvproxy resolves (via its embedded DNS) to the host-loopback IP. +/// +/// We rewrite loopback URLs to this hostname rather than the bare IP because: +/// * the guest init script seeds /etc/hosts with the same mapping, so it +/// resolves even when gvproxy's DNS is not in resolv.conf; +/// * keeping a recognisable hostname makes log messages clearer than a bare +/// 192.168.127.254 reference; +/// * `host.docker.internal` works the same way for Docker-flavoured tooling. +/// +/// Both names ultimately route through the gvproxy NAT path on +/// `GVPROXY_HOST_LOOPBACK_IP` — they do **not** go through the gateway IP. +const GVPROXY_HOST_LOOPBACK_ALIAS: &str = "host.containers.internal"; const GUEST_SSH_SOCKET_PATH: &str = "/run/openshell/ssh.sock"; const GUEST_TLS_DIR: &str = "/opt/openshell/tls"; const GUEST_TLS_CA_PATH: &str = "/opt/openshell/tls/ca.crt"; @@ -1660,6 +1688,25 @@ fn merged_environment(sandbox: &Sandbox) -> HashMap { environment } +/// Rewrites loopback host references in a gateway URL to a hostname the guest +/// can reach via gvproxy. +/// +/// The driver receives the gateway endpoint from `--openshell-endpoint`, which +/// in local/dev/e2e setups is typically `http://127.0.0.1:`. That URL is +/// useless inside the guest because the guest's loopback interface is its own, +/// not the host's. Inside the guest we need a name that gvproxy will translate +/// into the host's loopback address. +/// +/// We rewrite to `host.containers.internal`, which gvproxy's embedded DNS resolves +/// to the host-loopback IP `192.168.127.254`. gvproxy installs a default NAT entry +/// rewriting that destination to the host's `127.0.0.1` and dialing out from the +/// host process, so any port the host is listening on becomes reachable. The +/// gateway IP `192.168.127.1` does **not** do this — it only listens on gvproxy's +/// own service ports (DNS, DHCP, HTTP API). The guest init script also seeds the +/// hostname in `/etc/hosts` so resolution works even if gvproxy's DNS isn't in +/// resolv.conf (e.g. when DHCP fails). +/// +/// Non-loopback URLs are returned unchanged. fn guest_visible_openshell_endpoint(endpoint: &str) -> String { let Ok(mut url) = Url::parse(endpoint) else { return endpoint.to_string(); @@ -1672,7 +1719,7 @@ fn guest_visible_openshell_endpoint(endpoint: &str) -> String { None => false, }; - if should_rewrite && url.set_host(Some(GVPROXY_GATEWAY_IP)).is_ok() { + if should_rewrite && url.set_host(Some(GVPROXY_HOST_LOOPBACK_ALIAS)).is_ok() { return url.to_string(); } @@ -2181,7 +2228,7 @@ mod tests { let env = build_guest_environment(&sandbox, &config, None); assert!(env.contains(&"HOME=/root".to_string())); assert!(env.contains(&format!( - "OPENSHELL_ENDPOINT=http://{GVPROXY_GATEWAY_IP}:8080/" + "OPENSHELL_ENDPOINT=http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/" ))); assert!(env.contains(&"OPENSHELL_SANDBOX_ID=sandbox-123".to_string())); assert!(env.contains(&format!( @@ -2223,18 +2270,18 @@ mod tests { } #[test] - fn guest_visible_openshell_endpoint_rewrites_loopback_hosts_to_gvproxy_gateway() { + fn guest_visible_openshell_endpoint_rewrites_loopback_hosts_to_gvproxy_host_alias() { assert_eq!( guest_visible_openshell_endpoint("http://127.0.0.1:8080"), - format!("http://{GVPROXY_GATEWAY_IP}:8080/") + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/") ); assert_eq!( guest_visible_openshell_endpoint("http://localhost:8080"), - format!("http://{GVPROXY_GATEWAY_IP}:8080/") + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/") ); assert_eq!( guest_visible_openshell_endpoint("https://[::1]:8443"), - format!("https://{GVPROXY_GATEWAY_IP}:8443/") + format!("https://{GVPROXY_HOST_LOOPBACK_ALIAS}:8443/") ); } @@ -2247,8 +2294,10 @@ mod tests { format!("http://{OPENSHELL_HOST_GATEWAY_ALIAS}:8080") ); assert_eq!( - guest_visible_openshell_endpoint("http://host.containers.internal:8080"), - "http://host.containers.internal:8080" + guest_visible_openshell_endpoint(&format!( + "http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080" + )), + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080") ); assert_eq!( guest_visible_openshell_endpoint(&format!("http://{GVPROXY_GATEWAY_IP}:8080")), diff --git a/crates/openshell-driver-vm/src/runtime.rs b/crates/openshell-driver-vm/src/runtime.rs index c063da10b..a7c9afcea 100644 --- a/crates/openshell-driver-vm/src/runtime.rs +++ b/crates/openshell-driver-vm/src/runtime.rs @@ -705,13 +705,15 @@ fn run_libkrun_vm(config: &VmLaunchConfig) -> Result<(), String> { // talks to on boot (IPs 192.168.127.1 / .2, defaults for // gvisor-tap-vsock); // * the host-facing gateway identity the guest uses for callbacks: - // the init script seeds `/etc/hosts` with - // `host.openshell.internal` pointing at 192.168.127.1 while - // leaving gvproxy's legacy `host.containers.internal` / - // `host.docker.internal` DNS answers intact, which is how the guest's - // `rewrite_openshell_endpoint_if_needed` probe reaches the host - // gateway when the bare loopback address doesn't resolve from - // inside the VM. + // gvproxy installs a default NAT entry rewriting `192.168.127.254` + // (the subnet's HostIP) to the host's `127.0.0.1`, and serves + // `host.containers.internal` / `host.docker.internal` / + // `host.openshell.internal` in its embedded DNS pointing at that + // same HostIP. The guest init script seeds /etc/hosts with the + // same mapping so the supervisor reaches the host gateway even + // when gvproxy's DNS isn't in resolv.conf. The gateway IP + // (192.168.127.1) is NOT a host-loopback proxy — it only listens + // on its own service ports (DNS:53, DHCP, HTTP API:80). // // That network plane is also what the sandbox supervisor's // per-sandbox netns (veth pair + iptables, see diff --git a/crates/openshell-driver-vm/start.sh b/crates/openshell-driver-vm/start.sh deleted file mode 100755 index c81cc22ed..000000000 --- a/crates/openshell-driver-vm/start.sh +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -# Under sudo, PATH is reset and user-local tools (mise, cargo) disappear. -# Restore the invoking user's tool directories so mise and its shims work. -if [ -n "${SUDO_USER:-}" ]; then - _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) - for _p in "${_sudo_home}/.local/bin" "${_sudo_home}/.local/share/mise/shims" "${_sudo_home}/.cargo/bin"; do - [ -d "${_p}" ] && PATH="${_p}:${PATH}" - done - export PATH -fi - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -source "${ROOT}/crates/openshell-vm/pins.env" 2>/dev/null || true -CLI_BIN="${ROOT}/scripts/bin/openshell" -ENV_FILE="${ROOT}/.env" -COMPRESSED_DIR_DEFAULT="${ROOT}/target/vm-runtime-compressed" -COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR_DEFAULT}}" -SERVER_PORT_REQUESTED="${OPENSHELL_SERVER_PORT:-${GATEWAY_PORT:-}}" -SERVER_PORT="${SERVER_PORT_REQUESTED:-}" -STATE_DIR_ROOT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}" -VM_HOST_GATEWAY_DEFAULT="${OPENSHELL_VM_HOST_GATEWAY:-host.containers.internal}" -DRIVER_DIR_DEFAULT="${ROOT}/target/debug" -DRIVER_DIR="${OPENSHELL_DRIVER_DIR:-${DRIVER_DIR_DEFAULT}}" - -normalize_name() { - printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//' -} - -for arg in "$@"; do - if [ "${arg}" = "--gpu" ]; then - export OPENSHELL_VM_GPU=true - break - fi -done - -has_env_key() { - local key=$1 - [ -f "${ENV_FILE}" ] || return 1 - grep -Eq "^[[:space:]]*(export[[:space:]]+)?${key}=" "${ENV_FILE}" -} - -append_env_if_missing() { - local key=$1 - local value=$2 - if has_env_key "${key}"; then - return - fi - if [ -f "${ENV_FILE}" ] && [ -s "${ENV_FILE}" ]; then - if [ "$(tail -c1 "${ENV_FILE}" | wc -l)" -eq 0 ]; then - printf "\n" >>"${ENV_FILE}" - fi - fi - printf "%s=%s\n" "${key}" "${value}" >>"${ENV_FILE}" -} - -upsert_env_key() { - local key=$1 - local value=$2 - local tmp_file - - tmp_file="$(mktemp "${ENV_FILE}.tmp.XXXXXX")" - if [ -f "${ENV_FILE}" ]; then - awk -v key="${key}" -v value="${value}" ' - BEGIN { updated = 0 } - $0 ~ "^[[:space:]]*(export[[:space:]]+)?" key "=" { - if (!updated) { - print key "=" value - updated = 1 - } - next - } - { print } - END { - if (!updated) { - print key "=" value - } - } - ' "${ENV_FILE}" >"${tmp_file}" - else - printf "%s=%s\n" "${key}" "${value}" >"${tmp_file}" - fi - mv "${tmp_file}" "${ENV_FILE}" -} - -normalize_bool() { - case "${1,,}" in - 1|true|yes|on) echo "true" ;; - 0|false|no|off) echo "false" ;; - *) - echo "invalid boolean value '$1' (expected true/false, 1/0, yes/no, on/off)" >&2 - exit 1 - ;; - esac -} - -port_is_in_use() { - local port=$1 - if command -v lsof >/dev/null 2>&1; then - lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 - return $? - fi - - if command -v nc >/dev/null 2>&1; then - nc -z 127.0.0.1 "${port}" >/dev/null 2>&1 - return $? - fi - - (echo >/dev/tcp/127.0.0.1/"${port}") >/dev/null 2>&1 -} - -pick_random_port() { - local lower=20000 - local upper=60999 - local attempts=256 - local port - - for _ in $(seq 1 "${attempts}"); do - port=$((RANDOM % (upper - lower + 1) + lower)) - if ! port_is_in_use "${port}"; then - echo "${port}" - return 0 - fi - done - - echo "ERROR: could not find a free port after ${attempts} attempts." >&2 - return 1 -} - -check_supervisor_cross_toolchain() { - # The sandbox supervisor inside the guest is always Linux. On non-Linux - # hosts (macOS) and on Linux hosts with a different arch than the guest, - # we cross-compile via cargo-zigbuild and need the matching rustup target. - local host_os host_arch guest_arch rust_target - host_os="$(uname -s)" - host_arch="$(uname -m)" - guest_arch="${GUEST_ARCH:-${host_arch}}" - case "${guest_arch}" in - arm64|aarch64) rust_target="aarch64-unknown-linux-gnu" ;; - x86_64|amd64) rust_target="x86_64-unknown-linux-gnu" ;; - *) return 0 ;; - esac - if [ "${host_os}" = "Linux" ] && [ "${host_arch}" = "${guest_arch}" ]; then - return 0 - fi - local missing=0 - if ! command -v cargo-zigbuild >/dev/null 2>&1; then - echo "ERROR: cargo-zigbuild not found (required to cross-compile the guest supervisor)." >&2 - echo " Install: cargo install --locked cargo-zigbuild && brew install zig" >&2 - missing=1 - fi - if ! rustup target list --installed 2>/dev/null | grep -qx "${rust_target}"; then - echo "ERROR: Rust target '${rust_target}' not installed." >&2 - echo " Install: rustup target add ${rust_target}" >&2 - missing=1 - fi - if [ "${missing}" -ne 0 ]; then - exit 1 - fi -} - -build_gateway_and_driver() { - local rustc_wrapper_mode="${1:-default}" - local cargo_prefix=() - - if [ "${rustc_wrapper_mode}" = "without-rustc-wrapper" ]; then - cargo_prefix=(env -u RUSTC_WRAPPER) - fi - - "${cargo_prefix[@]}" cargo build -p openshell-server -p openshell-driver-vm -} - -if [ -n "${SERVER_PORT_REQUESTED}" ]; then - if port_is_in_use "${SERVER_PORT}"; then - echo "ERROR: requested gateway port ${SERVER_PORT} is already in use." >&2 - echo " Update .env GATEWAY_PORT or override it for one run:" >&2 - echo " OPENSHELL_SERVER_PORT= mise run gateway:vm" >&2 - exit 1 - fi -else - SERVER_PORT="$(pick_random_port)" - append_env_if_missing "GATEWAY_PORT" "${SERVER_PORT}" -fi - -GATEWAY_NAME_DEFAULT="$(basename "${ROOT}")" -GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-${GATEWAY_NAME_DEFAULT}}" -if [ -z "${GATEWAY_NAME}" ]; then - GATEWAY_NAME="openshell" -fi - -# Keep the driver socket path under AF_UNIX SUN_LEN on macOS. -STATE_LABEL_RAW="${OPENSHELL_VM_INSTANCE:-$(normalize_name "${GATEWAY_NAME}")}" -STATE_LABEL="$(printf '%s' "${STATE_LABEL_RAW}" | tr -cs '[:alnum:]._-' '-')" -if [ -z "${STATE_LABEL}" ]; then - STATE_LABEL="gateway" -fi -STATE_DIR_DEFAULT="${STATE_DIR_ROOT}/openshell-vm-driver-dev-${USER:-user}-${STATE_LABEL}" -STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${STATE_DIR_DEFAULT}}" -DB_PATH_DEFAULT="${STATE_DIR}/openshell.db" -LOCAL_GATEWAY_ENDPOINT_DEFAULT="http://127.0.0.1:${SERVER_PORT}" -LOCAL_GATEWAY_ENDPOINT="${OPENSHELL_VM_LOCAL_GATEWAY_ENDPOINT:-${LOCAL_GATEWAY_ENDPOINT_DEFAULT}}" - -export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${COMPRESSED_DIR}" -export OPENSHELL_GATEWAY="${GATEWAY_NAME}" - -upsert_env_key "OPENSHELL_GATEWAY" "${GATEWAY_NAME}" - -mkdir -p "${STATE_DIR}" - -if [ ! -d "${COMPRESSED_DIR}" ] || ! find "${COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q . || [ ! -f "${COMPRESSED_DIR}/gvproxy.zst" ]; then - echo "==> Preparing embedded VM runtime" - mise run vm:setup -fi - -if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then - check_supervisor_cross_toolchain - echo "==> Building bundled VM supervisor" - mise run vm:supervisor -fi - -echo "==> Building gateway and VM compute driver" -if ! build_gateway_and_driver; then - if [ -n "${RUSTC_WRAPPER:-}" ]; then - echo "WARNING: gateway/driver build failed through RUSTC_WRAPPER=${RUSTC_WRAPPER}; retrying without RUSTC_WRAPPER." >&2 - build_gateway_and_driver without-rustc-wrapper - else - exit 1 - fi -fi - -if [ "$(uname -s)" = "Darwin" ]; then - echo "==> Codesigning VM compute driver" - codesign \ - --entitlements "${ROOT}/crates/openshell-driver-vm/entitlements.plist" \ - --force \ - -s - \ - "${ROOT}/target/debug/openshell-driver-vm" -fi - -export OPENSHELL_DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" -export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${DB_PATH_DEFAULT}}" -export OPENSHELL_DRIVERS="${OPENSHELL_DRIVERS:-vm}" -export OPENSHELL_DRIVER_DIR="${DRIVER_DIR}" -export OPENSHELL_SERVER_PORT="${SERVER_PORT}" -export OPENSHELL_GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-http://${VM_HOST_GATEWAY_DEFAULT}:${SERVER_PORT}}" -export OPENSHELL_SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-${COMMUNITY_SANDBOX_IMAGE:-}}" -export OPENSHELL_SSH_GATEWAY_HOST="${OPENSHELL_SSH_GATEWAY_HOST:-127.0.0.1}" -export OPENSHELL_SSH_GATEWAY_PORT="${OPENSHELL_SSH_GATEWAY_PORT:-${SERVER_PORT}}" -export OPENSHELL_SSH_HANDSHAKE_SECRET="${OPENSHELL_SSH_HANDSHAKE_SECRET:-dev-vm-driver-secret}" -export OPENSHELL_VM_DRIVER_STATE_DIR="${STATE_DIR}" - -# Resolve the VM runtime directory (contains vmlinux, virtiofsd, etc.) -# so the child --internal-run-vm process can find it under sudo. -if [ -z "${OPENSHELL_VM_RUNTIME_DIR:-}" ]; then - _candidate="${HOME}/.local/share/openshell/vm-runtime/0.0.0" - if [ -n "${SUDO_USER:-}" ]; then - _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) - _candidate="${_sudo_home}/.local/share/openshell/vm-runtime/0.0.0" - fi - if [ -f "${_candidate}/vmlinux" ]; then - export OPENSHELL_VM_RUNTIME_DIR="${_candidate}" - fi -fi - -# Write gateway metadata to the invoking user's config dir, even under sudo. -# When running under sudo, $HOME points at /root and XDG_CONFIG_HOME may also -# be inherited from the root env; fall back to SUDO_USER's home directory so -# repo-local `scripts/bin/openshell` reads the same config. -if [ -n "${SUDO_USER:-}" ]; then - _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) - GATEWAY_CONFIG_BASE="${_sudo_home}/.config" -else - GATEWAY_CONFIG_BASE="${XDG_CONFIG_HOME:-${HOME}/.config}" -fi -GATEWAY_METADATA_DIR="${GATEWAY_CONFIG_BASE}/openshell/gateways/${GATEWAY_NAME}" -mkdir -p "${GATEWAY_METADATA_DIR}" -cat >"${GATEWAY_METADATA_DIR}/metadata.json" </dev/null || true -fi - -echo "==> Gateway config" -echo " Name: ${GATEWAY_NAME}" -echo " Endpoint: ${LOCAL_GATEWAY_ENDPOINT}" -echo " .env: OPENSHELL_GATEWAY=${GATEWAY_NAME}" -echo " .env: GATEWAY_PORT=${SERVER_PORT}" -echo " Driver: ${OPENSHELL_DRIVER_DIR}/openshell-driver-vm" -echo " Image: ${OPENSHELL_SANDBOX_IMAGE}" -echo " Status: ${CLI_BIN} status" -echo " Create: ${CLI_BIN} sandbox create --name vm-test --from ubuntu:24.04" - -echo "==> Starting OpenShell server with VM compute driver" -exec "${ROOT}/target/debug/openshell-gateway" diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index e5d2880ec..23b728060 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -310,9 +310,14 @@ pub async fn spawn( if !vm_config.default_image.trim().is_empty() { command.arg("--default-image").arg(&vm_config.default_image); } - command - .arg("--ssh-handshake-secret") - .arg(&config.ssh_handshake_secret); + // Only forward the handshake secret when one is configured. The VM + // driver does not consume it, but accepts it for parity with the + // Kubernetes/Podman drivers; passing an empty value is noise. + if !config.ssh_handshake_secret.is_empty() { + command + .arg("--ssh-handshake-secret") + .arg(&config.ssh_handshake_secret); + } command .arg("--ssh-handshake-skew-secs") .arg(config.ssh_handshake_skew_secs.to_string()); diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 95c18fdda..6c59e9822 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -148,12 +148,10 @@ pub async fn run_server( if database_url.is_empty() { return Err(Error::config("database_url is required")); } - let driver = configured_compute_driver(&config)?; - if config.ssh_handshake_secret.is_empty() && driver != ComputeDriverKind::Docker { - return Err(Error::config( - "ssh_handshake_secret is required. Set --ssh-handshake-secret or OPENSHELL_SSH_HANDSHAKE_SECRET", - )); - } + // The ssh_handshake_secret check is driver-scoped: the Kubernetes and + // Podman drivers inject the secret into sandbox env, while the VM and + // Docker drivers do not consume it. The check is enforced per-driver + // in `build_compute_runtime`. let store = Arc::new(Store::connect(database_url).await?); @@ -368,32 +366,35 @@ async fn build_compute_runtime( info!(driver = %driver, "Using compute driver"); match driver { - ComputeDriverKind::Kubernetes => ComputeRuntime::new_kubernetes( - KubernetesComputeConfig { - namespace: config.sandbox_namespace.clone(), - default_image: config.sandbox_image.clone(), - image_pull_policy: config.sandbox_image_pull_policy.clone(), - grpc_endpoint: config.grpc_endpoint.clone(), - // Filesystem path to the supervisor's Unix-socket SSH daemon. - // The path lives in a root-only directory so only the - // supervisor can connect; the gateway reaches it through the - // RelayStream bridge, not directly. Override via - // `sandbox_ssh_socket_path` in the config for deployments - // where multiple supervisors share a filesystem. - ssh_socket_path: config.sandbox_ssh_socket_path.clone(), - ssh_handshake_secret: config.ssh_handshake_secret.clone(), - ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, - client_tls_secret_name: config.client_tls_secret_name.clone(), - host_gateway_ip: config.host_gateway_ip.clone(), - }, - store, - sandbox_index, - sandbox_watch_bus, - tracing_log_bus, - supervisor_sessions.clone(), - ) - .await - .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))), + ComputeDriverKind::Kubernetes => { + require_ssh_handshake_secret(config)?; + ComputeRuntime::new_kubernetes( + KubernetesComputeConfig { + namespace: config.sandbox_namespace.clone(), + default_image: config.sandbox_image.clone(), + image_pull_policy: config.sandbox_image_pull_policy.clone(), + grpc_endpoint: config.grpc_endpoint.clone(), + // Filesystem path to the supervisor's Unix-socket SSH daemon. + // The path lives in a root-only directory so only the + // supervisor can connect; the gateway reaches it through + // the RelayStream bridge, not directly. Override via + // `sandbox_ssh_socket_path` in the config for deployments + // where multiple supervisors share a filesystem. + ssh_socket_path: config.sandbox_ssh_socket_path.clone(), + ssh_handshake_secret: config.ssh_handshake_secret.clone(), + ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, + client_tls_secret_name: config.client_tls_secret_name.clone(), + host_gateway_ip: config.host_gateway_ip.clone(), + }, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions.clone(), + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) + } ComputeDriverKind::Docker => ComputeRuntime::new_docker( config.clone(), docker_config.clone(), @@ -420,6 +421,7 @@ async fn build_compute_runtime( .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } ComputeDriverKind::Podman => { + require_ssh_handshake_secret(config)?; let socket_path = std::env::var("OPENSHELL_PODMAN_SOCKET") .ok() .filter(|s| !s.is_empty()) @@ -471,6 +473,15 @@ async fn build_compute_runtime( } } +fn require_ssh_handshake_secret(config: &Config) -> Result<()> { + if config.ssh_handshake_secret.is_empty() { + return Err(Error::config( + "ssh_handshake_secret is required for this driver. Set --ssh-handshake-secret or OPENSHELL_SSH_HANDSHAKE_SECRET", + )); + } + Ok(()) +} + fn configured_compute_driver(config: &Config) -> Result { match config.compute_drivers.as_slice() { [] => Err(Error::config( diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh index 5990d8db6..551f9b41e 100755 --- a/e2e/rust/e2e-vm.sh +++ b/e2e/rust/e2e-vm.sh @@ -167,13 +167,20 @@ echo "==> Starting openshell-gateway on 127.0.0.1:${HOST_PORT} (state: ${RUN_STA # `~/.local/libexec/openshell/openshell-driver-vm` when present # (install-vm.sh installs there), which silently shadows development # builds — a subtle source of stale-binary bugs in e2e runs. +# --grpc-endpoint is the URL the VM driver passes into each guest as +# OPENSHELL_ENDPOINT. The supervisor inside the VM dials this address. +# Use `host.containers.internal` rather than `127.0.0.1` so gvproxy's +# host-loopback proxy carries the connection — gvproxy's bare gateway IP +# (192.168.127.1) does NOT forward arbitrary host ports. The driver also +# rewrites loopback URLs to this hostname as a safety net, so this matches +# what the guest will actually see and aligns with `tasks/scripts/gateway-vm.sh`. "${GATEWAY_BIN}" \ --drivers vm \ --disable-tls \ --disable-gateway-auth \ --db-url 'sqlite::memory:' \ --port "${HOST_PORT}" \ - --grpc-endpoint "http://127.0.0.1:${HOST_PORT}" \ + --grpc-endpoint "http://host.containers.internal:${HOST_PORT}" \ --ssh-handshake-secret "${SSH_HANDSHAKE_SECRET}" \ --driver-dir "${ROOT}/target/debug" \ --vm-driver-state-dir "${RUN_STATE_DIR}" \ diff --git a/tasks/gateway.toml b/tasks/gateway.toml index 3f7a684d2..1df07cb95 100644 --- a/tasks/gateway.toml +++ b/tasks/gateway.toml @@ -6,3 +6,7 @@ ["gateway:docker"] description = "Run a standalone gateway with the bundled Docker compute driver" run = "bash tasks/scripts/gateway-docker.sh" + +["gateway:vm"] +description = "Run a standalone gateway with the bundled VM compute driver" +run = "bash tasks/scripts/gateway-vm.sh" diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh new file mode 100755 index 000000000..f0623a46c --- /dev/null +++ b/tasks/scripts/gateway-vm.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Start a standalone openshell-gateway backed by the VM compute driver +# (openshell-driver-vm) for local manual testing. +# +# Defaults: +# - Plaintext HTTP on 127.0.0.1:18081 +# - Dedicated CLI gateway "vm-dev" +# - Persistent gateway state (SQLite DB) under .cache/gateway-vm +# - Per-sandbox VM driver state (rootfs + compute-driver.sock) under +# /tmp/openshell-vm-driver-- so the AF_UNIX socket +# path stays under macOS SUN_LEN +# +# Common overrides: +# OPENSHELL_SERVER_PORT=18091 mise run gateway:vm +# OPENSHELL_VM_GATEWAY_NAME=my-vm-gateway mise run gateway:vm +# OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:vm +# OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:vm +# +# After the gateway is running, point the CLI at it with either: +# openshell --gateway vm-dev +# openshell gateway select vm-dev # then plain `openshell ` + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +PORT="${OPENSHELL_SERVER_PORT:-18081}" +GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-vm-dev}" +STATE_DIR="${OPENSHELL_VM_GATEWAY_STATE_DIR:-${ROOT}/.cache/gateway-vm}" +SANDBOX_NAMESPACE="${OPENSHELL_SANDBOX_NAMESPACE:-vm-dev}" +SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-${COMMUNITY_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base:latest}}" +SANDBOX_IMAGE_PULL_POLICY="${OPENSHELL_SANDBOX_IMAGE_PULL_POLICY:-IfNotPresent}" +LOG_LEVEL="${OPENSHELL_LOG_LEVEL:-info}" +GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway" +DRIVER_DIR_DEFAULT="${ROOT}/target/debug" +DRIVER_DIR="${OPENSHELL_DRIVER_DIR:-${DRIVER_DIR_DEFAULT}}" +COMPRESSED_DIR_DEFAULT="${ROOT}/target/vm-runtime-compressed" +COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR_DEFAULT}}" +VM_HOST_GATEWAY_DEFAULT="${OPENSHELL_VM_HOST_GATEWAY:-host.containers.internal}" +GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-http://${VM_HOST_GATEWAY_DEFAULT}:${PORT}}" + +normalize_arch() { + case "$1" in + x86_64|amd64) echo "amd64" ;; + aarch64|arm64) echo "arm64" ;; + *) echo "$1" ;; + esac +} + +normalize_bool() { + case "${1,,}" in + 1|true|yes|on) echo "true" ;; + 0|false|no|off) echo "false" ;; + *) + echo "ERROR: invalid boolean value '$1' (expected true/false, 1/0, yes/no, on/off)" >&2 + exit 2 + ;; + esac +} + +port_is_in_use() { + local port=$1 + if command -v lsof >/dev/null 2>&1; then + lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 + return $? + fi + if command -v nc >/dev/null 2>&1; then + nc -z 127.0.0.1 "${port}" >/dev/null 2>&1 + return $? + fi + (echo >/dev/tcp/127.0.0.1/"${port}") >/dev/null 2>&1 +} + +register_gateway_metadata() { + local name=$1 + local endpoint=$2 + local port=$3 + local config_home gateway_dir + + config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" + gateway_dir="${config_home}/openshell/gateways/${name}" + + mkdir -p "${gateway_dir}" + cat >"${gateway_dir}/metadata.json" </dev/null 2>&1; then + echo "ERROR: cargo-zigbuild not found (required to cross-compile the guest supervisor)." >&2 + echo " Install: cargo install --locked cargo-zigbuild && brew install zig" >&2 + missing=1 + fi + if ! rustup target list --installed 2>/dev/null | grep -qx "${rust_target}"; then + echo "ERROR: Rust target '${rust_target}' not installed." >&2 + echo " Install: rustup target add ${rust_target}" >&2 + missing=1 + fi + if [ "${missing}" -ne 0 ]; then + exit 1 + fi +} + +if [[ ! "${GATEWAY_NAME}" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "ERROR: OPENSHELL_VM_GATEWAY_NAME must contain only letters, numbers, dots, underscores, or dashes" >&2 + exit 2 +fi + +if port_is_in_use "${PORT}"; then + echo "ERROR: port ${PORT} is already in use; free it or set OPENSHELL_SERVER_PORT" >&2 + exit 2 +fi + +# AF_UNIX SUN_LEN on macOS is 104 bytes; the VM driver places +# `compute-driver.sock` directly under VM_DRIVER_STATE_DIR, so anchor it +# under /tmp instead of `${ROOT}/.cache` (which is typically too long on +# macOS dev boxes with worktree paths). +STATE_LABEL="$(printf '%s' "${GATEWAY_NAME}" | tr -cs '[:alnum:]._-' '-')" +if [ -z "${STATE_LABEL}" ]; then + STATE_LABEL="vm-dev" +fi +VM_DRIVER_STATE_DIR_DEFAULT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}/openshell-vm-driver-${USER:-user}-${STATE_LABEL}" +VM_DRIVER_STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${VM_DRIVER_STATE_DIR_DEFAULT}}" + +DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" + +# Build prerequisites: VM runtime artifacts + bundled supervisor. +if [ ! -d "${COMPRESSED_DIR}" ] \ + || ! find "${COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q . \ + || [ ! -f "${COMPRESSED_DIR}/gvproxy.zst" ]; then + echo "==> Preparing embedded VM runtime (mise run vm:setup)" + mise run vm:setup +fi + +if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then + check_supervisor_cross_toolchain + echo "==> Building bundled VM supervisor (mise run vm:supervisor)" + mise run vm:supervisor +fi + +export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${COMPRESSED_DIR}" + +CARGO_BUILD_JOBS_ARG=() +if [[ -n "${CARGO_BUILD_JOBS:-}" ]]; then + CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}") +fi + +echo "==> Building openshell-gateway and openshell-driver-vm" +cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + -p openshell-server -p openshell-driver-vm + +if [ "$(uname -s)" = "Darwin" ]; then + echo "==> Codesigning openshell-driver-vm (Hypervisor entitlement)" + codesign \ + --entitlements "${ROOT}/crates/openshell-driver-vm/entitlements.plist" \ + --force \ + -s - \ + "${DRIVER_DIR}/openshell-driver-vm" +fi + +mkdir -p "${STATE_DIR}" +mkdir -p "${VM_DRIVER_STATE_DIR}" + +GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" +register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" + +echo "Starting standalone VM gateway..." +echo " gateway: ${GATEWAY_NAME}" +echo " endpoint: ${GATEWAY_ENDPOINT}" +echo " namespace: ${SANDBOX_NAMESPACE}" +echo " state dir: ${STATE_DIR}" +echo " driver: ${DRIVER_DIR}/openshell-driver-vm" +echo " driver dir: ${VM_DRIVER_STATE_DIR}" +echo " image: ${SANDBOX_IMAGE}" +echo +echo "Point the CLI at this gateway with one of:" +echo " openshell --gateway ${GATEWAY_NAME} status" +echo " openshell gateway select ${GATEWAY_NAME}" +echo + +GATEWAY_ARGS=( + --port "${PORT}" + --log-level "${LOG_LEVEL}" + --drivers vm + --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" + --sandbox-namespace "${SANDBOX_NAMESPACE}" + --sandbox-image "${SANDBOX_IMAGE}" + --sandbox-image-pull-policy "${SANDBOX_IMAGE_PULL_POLICY}" + --grpc-endpoint "${GRPC_ENDPOINT}" + --driver-dir "${DRIVER_DIR}" + --vm-driver-state-dir "${VM_DRIVER_STATE_DIR}" +) + +if [ "${DISABLE_TLS}" = "true" ]; then + GATEWAY_ARGS+=(--disable-tls) +fi + +exec "${GATEWAY_BIN}" "${GATEWAY_ARGS[@]}" diff --git a/tasks/vm.toml b/tasks/vm.toml index 410416000..e9eb22561 100644 --- a/tasks/vm.toml +++ b/tasks/vm.toml @@ -7,20 +7,18 @@ # mise run vm:setup # one-time: download pre-built runtime (~30s) # mise run vm:supervisor # build the bundled sandbox supervisor # mise run gateway:vm # start openshell-gateway with the VM driver +# # (defined in tasks/gateway.toml) # mise run vm # build + run the standalone openshell-vm microVM # mise run vm:clean # wipe everything and start over # -# See crates/openshell-driver-vm/README.md for the `gateway:vm` flow and +# See tasks/gateway.toml for `gateway:vm`, +# crates/openshell-driver-vm/README.md for the VM driver workflow, and # crates/openshell-vm/README.md for the standalone microVM path. # ═══════════════════════════════════════════════════════════════════════════ # Main Commands # ═══════════════════════════════════════════════════════════════════════════ -["gateway:vm"] -description = "Build openshell-gateway + openshell-driver-vm and start the gateway with the VM driver (pass -- --gpu for GPU support)" -run = "crates/openshell-driver-vm/start.sh" - [vm] description = "Build and run the standalone openshell-vm microVM" depends = ["build:docker:gateway"] From e73692991106d8ac288a38d42de808d345907d18 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 14:55:03 -0700 Subject: [PATCH 05/11] wip --- crates/openshell-cli/src/run.rs | 69 +++++ crates/openshell-driver-vm/src/driver.rs | 318 +++++++++++++++++++---- tasks/gateway.toml | 20 ++ tasks/scripts/gateway-docker.sh | 31 ++- tasks/scripts/gateway-start.sh | 34 +++ tasks/scripts/gateway-vm.sh | 31 ++- 6 files changed, 436 insertions(+), 67 deletions(-) create mode 100755 tasks/scripts/gateway-start.sh diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index cd651eaa8..8d3568885 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -194,6 +194,45 @@ enum KubeEventReason { Started, } +/// Decide whether a server/driver log line should be surfaced during +/// `sandbox create` provisioning. +/// +/// Defaults to a curated allowlist so the spinner stays focused on +/// meaningful progress. Set `OPENSHELL_PROVISION_VERBOSE=1` to surface +/// every log line (helpful when debugging stuck provisioning, e.g. on +/// the experimental VM gateway). +fn should_show_provisioning_log(line: &openshell_core::proto::SandboxLogLine) -> bool { + if std::env::var("OPENSHELL_PROVISION_VERBOSE") + .map(|v| !v.is_empty() && v != "0" && v.to_ascii_lowercase() != "false") + .unwrap_or(false) + { + return true; + } + + // Always surface warnings and errors during provisioning. + let level = line.level.to_ascii_lowercase(); + if matches!(level.as_str(), "warn" | "warning" | "error") { + return true; + } + + // Allowlisted substrings for info-level progress lines emitted by + // the server compute layer and bundled drivers (VM / Docker). Keep + // this short — anything not matching is suppressed by default. + const ALLOWLIST: &[&str] = &[ + "Sandbox phase changed", + "Pulling image", + "Pulled image", + "Extracting", + "Preparing rootfs", + "Booting VM", + "Starting VM", + "Starting sandbox", + "Sandbox ready", + "Supervisor connected", + ]; + ALLOWLIST.iter().any(|needle| line.message.contains(needle)) +} + /// Map a Kubernetes event reason string to an enum. fn parse_kube_event_reason(reason: &str) -> Option { match reason { @@ -2302,6 +2341,36 @@ pub async fn sandbox_create( if !saw_gateway_ready && line.message.contains("listening") { saw_gateway_ready = true; } + + // Surface log lines as progress so users aren't staring at a + // silent spinner while non-Kubernetes drivers (VM, Docker) do + // their work. Drivers/server tracing with a `sandbox_id` + // field flows through here as Log payloads. + // + // The default filter keeps output focused on user-relevant + // progress: warn/error always, plus a curated allowlist of + // info messages. Set OPENSHELL_PROVISION_VERBOSE=1 to see + // every log line during provisioning. + if !line.message.is_empty() && should_show_provisioning_log(&line) { + if let Some(d) = display.as_mut() { + // Interactive: tuck the message under the spinner + // as detail so the checklist stays clean. + d.set_active_detail(&line.message); + } else { + let ts = format_timestamp(provision_start.elapsed()); + let level = if line.level.is_empty() { + "INFO".to_string() + } else { + line.level.to_uppercase() + }; + eprintln!( + " {} {} {}", + ts.dimmed(), + level.dimmed(), + line.message, + ); + } + } } Some(openshell_core::proto::sandbox_stream_event::Payload::Event(ev)) => { // Map Kubernetes events to provisioning steps. diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 6e2b25656..5325152c8 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -46,6 +46,7 @@ use tokio::process::{Child, Command}; use tokio::sync::{Mutex, broadcast, mpsc}; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; +use tracing::{info, warn}; use url::{Host, Url}; const DRIVER_NAME: &str = "openshell-driver-vm"; @@ -345,6 +346,11 @@ impl VmDriver { // gRPC API surface; boxing here would diverge from every other handler. #[allow(clippy::result_large_err)] pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result { + info!( + sandbox_id = %sandbox.id, + sandbox_name = %sandbox.name, + "vm driver: create_sandbox received" + ); validate_vm_sandbox(sandbox, self.config.gpu_enabled)?; if self.registry.lock().await.contains_key(&sandbox.id) { @@ -362,6 +368,12 @@ impl VmDriver { "vm sandboxes require template.image or a configured default sandbox image", ) })?; + info!( + sandbox_id = %sandbox.id, + image_ref = %image_ref, + state_dir = %state_dir.display(), + "vm driver: resolved image ref, preparing rootfs" + ); tokio::fs::create_dir_all(&state_dir) .await @@ -371,9 +383,34 @@ impl VmDriver { .config .tls_paths() .map_err(Status::failed_precondition)?; - let image_identity = match self.prepare_runtime_rootfs(&image_ref, &rootfs).await { - Ok(image_identity) => image_identity, + // Mirror the K8s `Scheduled` event so the CLI can complete the + // "Requesting sandbox" step and switch the spinner over to the + // image-pull phase before we block on the registry. + self.publish_platform_event( + sandbox.id.clone(), + platform_event( + "vm", + "Normal", + "Scheduled", + format!("Sandbox accepted by vm driver to image \"{image_ref}\""), + ), + ); + + let image_identity = match self.prepare_runtime_rootfs(&sandbox.id, &image_ref, &rootfs).await { + Ok(image_identity) => { + info!( + sandbox_id = %sandbox.id, + image_identity = %image_identity, + "vm driver: rootfs prepared" + ); + image_identity + } Err(err) => { + warn!( + sandbox_id = %sandbox.id, + error = %err.message(), + "vm driver: rootfs preparation failed" + ); let _ = tokio::fs::remove_dir_all(&state_dir).await; return Err(err); } @@ -507,9 +544,20 @@ impl VmDriver { command.arg("--vm-env").arg(env); } + info!( + sandbox_id = %sandbox.id, + launcher = %self.launcher_bin.display(), + console_output = %console_output.display(), + "vm driver: spawning VM launcher" + ); let child = match command.spawn() { Ok(child) => child, Err(err) => { + warn!( + sandbox_id = %sandbox.id, + error = %err, + "vm driver: launcher spawn failed" + ); if gpu_bdf.is_some() { self.release_gpu_and_subnet(&sandbox.id); } @@ -520,6 +568,23 @@ impl VmDriver { ))); } }; + info!( + sandbox_id = %sandbox.id, + launcher_pid = child.id().unwrap_or(0), + "vm driver: launcher spawned" + ); + // Mirror the K8s `Started` event so the CLI can complete the + // "Starting sandbox" step. The supervisor-ready transition still + // promotes the sandbox to `Ready` separately. + self.publish_platform_event( + sandbox.id.clone(), + platform_event( + "vm", + "Normal", + "Started", + "Started VM launcher".to_string(), + ), + ); let snapshot = sandbox_snapshot(sandbox, provisioning_condition(), false); let process = Arc::new(Mutex::new(VmProcess { child, @@ -665,10 +730,13 @@ impl VmDriver { async fn prepare_runtime_rootfs( &self, + sandbox_id: &str, image_ref: &str, rootfs: &Path, ) -> Result { - let image_identity = self.ensure_cached_image_rootfs_archive(image_ref).await?; + let image_identity = self + .ensure_cached_image_rootfs_archive(sandbox_id, image_ref) + .await?; let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); let rootfs_dest = rootfs.to_path_buf(); tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest)) @@ -688,16 +756,22 @@ impl VmDriver { }) } - async fn ensure_cached_image_rootfs_archive(&self, image_ref: &str) -> Result { + async fn ensure_cached_image_rootfs_archive( + &self, + sandbox_id: &str, + image_ref: &str, + ) -> Result { if let Some(rootfs_tar_path) = decode_rootfs_tar_image_ref(image_ref) { return self .ensure_cached_rootfs_tar_image_rootfs_archive(image_ref, &rootfs_tar_path) .await; } + info!(image_ref = %image_ref, "vm driver: ensuring cached image rootfs archive (registry)"); let reference = parse_registry_reference(image_ref)?; let client = registry_client(); let auth = registry_auth(image_ref)?; + info!(image_ref = %image_ref, "vm driver: authenticating with registry"); client .auth(&reference, &auth, RegistryOperation::Pull) .await @@ -706,6 +780,7 @@ impl VmDriver { "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" )) })?; + info!(image_ref = %image_ref, "vm driver: fetching manifest digest"); let image_identity = client .fetch_manifest_digest(&reference, &auth) .await @@ -714,18 +789,59 @@ impl VmDriver { "failed to resolve vm sandbox image '{image_ref}': {err}" )) })?; + info!( + image_ref = %image_ref, + image_identity = %image_identity, + "vm driver: manifest digest resolved" + ); let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + // Mirror the K8s `Pulling` event so the CLI flips to the + // image-pull spinner with the image name as detail. We emit it + // for cache hits too and immediately follow with `Pulled` so the + // spinner step still advances cleanly. + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulling", + format!("Pulling image \"{image_ref}\""), + ), + ); + if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + archive_path = %archive_path.display(), + "vm driver: image rootfs archive cache hit (no build needed)" + ); + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; return Ok(image_identity); } + info!( + image_identity = %image_identity, + "vm driver: image rootfs archive cache miss, acquiring build lock" + ); let _cache_guard = self.image_cache_lock.lock().await; + info!( + image_identity = %image_identity, + "vm driver: build lock acquired" + ); if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + "vm driver: image rootfs archive cache hit after lock (built by another task)" + ); + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; return Ok(image_identity); } self.build_cached_registry_image_rootfs_archive( + sandbox_id, &client, &reference, &auth, @@ -733,6 +849,8 @@ impl VmDriver { &image_identity, ) .await?; + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; Ok(image_identity) } @@ -848,6 +966,7 @@ impl VmDriver { async fn build_cached_registry_image_rootfs_archive( &self, + sandbox_id: &str, client: &OciClient, reference: &Reference, auth: &RegistryAuth, @@ -882,19 +1001,35 @@ impl VmDriver { Status::internal(format!("create image cache staging dir failed: {err}")) })?; - if let Err(err) = pull_registry_image_rootfs( - client, - reference, - auth, - image_ref, - &staging_dir, - &prepared_rootfs, - ) - .await + info!( + image_ref = %image_ref, + staging_dir = %staging_dir.display(), + "vm driver: pulling registry image layers" + ); + if let Err(err) = self + .pull_registry_image_rootfs( + sandbox_id, + client, + reference, + auth, + image_ref, + &staging_dir, + &prepared_rootfs, + ) + .await { + warn!( + image_ref = %image_ref, + error = %err.message(), + "vm driver: pull_registry_image_rootfs failed" + ); let _ = tokio::fs::remove_dir_all(&staging_dir).await; return Err(err); } + info!( + image_ref = %image_ref, + "vm driver: image layers pulled, preparing rootfs archive" + ); let image_ref_owned = image_ref.to_string(); let image_identity_owned = image_identity.to_string(); @@ -917,11 +1052,20 @@ impl VmDriver { .map_err(|err| Status::internal(format!("image rootfs preparation panicked: {err}")))?; if let Err(err) = build_result { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: rootfs archive build failed" + ); let _ = tokio::fs::remove_dir_all(&staging_dir).await; return Err(Status::failed_precondition(err)); } if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + "vm driver: another task wrote archive while we were building, discarding ours" + ); let _ = tokio::fs::remove_dir_all(&staging_dir).await; return Ok(()); } @@ -929,6 +1073,11 @@ impl VmDriver { tokio::fs::rename(&prepared_archive, &archive_path) .await .map_err(|err| Status::internal(format!("store cached image rootfs failed: {err}")))?; + info!( + image_identity = %image_identity, + archive_path = %archive_path.display(), + "vm driver: image rootfs archive committed to cache" + ); let _ = tokio::fs::remove_dir_all(&staging_dir).await; Ok(()) } @@ -1338,52 +1487,111 @@ fn image_reference_registry_host(image_ref: &str) -> &str { } } -async fn pull_registry_image_rootfs( - client: &OciClient, - reference: &Reference, - auth: &RegistryAuth, - image_ref: &str, - staging_dir: &Path, - rootfs: &Path, -) -> Result<(), Status> { - client - .auth(reference, auth, RegistryOperation::Pull) - .await - .map_err(|err| { - Status::failed_precondition(format!( - "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" - )) - })?; - let (manifest, _) = client - .pull_image_manifest(reference, auth) - .await - .map_err(|err| { - Status::failed_precondition(format!( - "failed to pull vm sandbox image manifest '{image_ref}': {err}" - )) - })?; +impl VmDriver { + async fn pull_registry_image_rootfs( + &self, + sandbox_id: &str, + client: &OciClient, + reference: &Reference, + auth: &RegistryAuth, + image_ref: &str, + staging_dir: &Path, + rootfs: &Path, + ) -> Result<(), Status> { + client + .auth(reference, auth, RegistryOperation::Pull) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" + )) + })?; + let (manifest, _) = client + .pull_image_manifest(reference, auth) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to pull vm sandbox image manifest '{image_ref}': {err}" + )) + })?; - tokio::fs::create_dir_all(rootfs) - .await - .map_err(|err| Status::internal(format!("create rootfs dir failed: {err}")))?; - tokio::fs::create_dir_all(staging_dir.join("layers")) - .await - .map_err(|err| Status::internal(format!("create layer staging dir failed: {err}")))?; + tokio::fs::create_dir_all(rootfs) + .await + .map_err(|err| Status::internal(format!("create rootfs dir failed: {err}")))?; + tokio::fs::create_dir_all(staging_dir.join("layers")) + .await + .map_err(|err| Status::internal(format!("create layer staging dir failed: {err}")))?; - for (index, layer) in manifest.layers.iter().enumerate() { - pull_registry_layer( - client, - reference, - image_ref, - staging_dir, - rootfs, - layer, - index, - ) - .await?; + let total_layers = manifest.layers.len(); + let total_bytes: i64 = manifest + .layers + .iter() + .map(|layer| layer.size.max(0)) + .sum(); + for (index, layer) in manifest.layers.iter().enumerate() { + // Emit a per-layer progress event so the CLI can show + // "Layer 3/8 (12.4 MB)" as detail under the spinner. + let mut metadata = HashMap::new(); + metadata.insert("layer_index".to_string(), (index + 1).to_string()); + metadata.insert("layer_total".to_string(), total_layers.to_string()); + metadata.insert("layer_digest".to_string(), layer.digest.clone()); + metadata.insert("layer_size_bytes".to_string(), layer.size.to_string()); + metadata.insert("image_ref".to_string(), image_ref.to_string()); + if total_bytes > 0 { + metadata.insert("image_size_bytes".to_string(), total_bytes.to_string()); + } + let mut event = platform_event( + "vm", + "Normal", + "PullingLayer", + format!( + "Pulling layer {}/{} ({} bytes) for image \"{image_ref}\"", + index + 1, + total_layers, + layer.size + ), + ); + event.metadata = metadata; + self.publish_platform_event(sandbox_id.to_string(), event); + + pull_registry_layer( + client, + reference, + image_ref, + staging_dir, + rootfs, + layer, + index, + ) + .await?; + } + + Ok(()) } - Ok(()) + /// Emit a `Pulled` platform event with a message that mirrors the + /// kubelet's `Successfully pulled image ... Image size: N bytes.` + /// format so the CLI's `extract_image_size` parser works unchanged. + async fn publish_pulled_event( + &self, + sandbox_id: &str, + image_ref: &str, + archive_path: &Path, + ) { + let size_suffix = match tokio::fs::metadata(archive_path).await { + Ok(meta) => format!(" Image size: {} bytes.", meta.len()), + Err(_) => String::new(), + }; + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulled", + format!("Successfully pulled image \"{image_ref}\".{size_suffix}"), + ), + ); + } } async fn pull_registry_layer( diff --git a/tasks/gateway.toml b/tasks/gateway.toml index 1df07cb95..23df38b3a 100644 --- a/tasks/gateway.toml +++ b/tasks/gateway.toml @@ -2,6 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 # Standalone gateway tasks +# +# Canonical entry point: +# mise run gateway:start # driver = docker | vm +# +# Backward-compatible aliases: +# mise run gateway:docker +# mise run gateway:vm +# +# All three commands also write ~/.config/openshell/active_gateway so the +# `openshell` CLI automatically targets the gateway you just started — no +# need to follow up with `openshell gateway select`. Inside this repo you +# can override the active gateway per-developer by setting OPENSHELL_GATEWAY +# in `.env` (mise loads it automatically). + +["gateway:start"] +description = "Run a standalone gateway with the given compute driver (docker | vm)" +usage = """ +arg "[driver]" help="Compute driver to start (docker | vm). Defaults to $OPENSHELL_GATEWAY_DRIVER or 'docker'." +""" +run = "bash tasks/scripts/gateway-start.sh" ["gateway:docker"] description = "Run a standalone gateway with the bundled Docker compute driver" diff --git a/tasks/scripts/gateway-docker.sh b/tasks/scripts/gateway-docker.sh index 23527741f..44e2ec7c7 100644 --- a/tasks/scripts/gateway-docker.sh +++ b/tasks/scripts/gateway-docker.sh @@ -6,6 +6,10 @@ # Start a standalone openshell-gateway backed by the Docker compute driver for # local manual testing. # +# Invocation: +# mise run gateway:start docker # canonical +# mise run gateway:docker # alias +# # Defaults: # - Plaintext HTTP on 127.0.0.1:18080 # - Dedicated sandbox namespace "docker-dev" @@ -17,9 +21,11 @@ # OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:docker # OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:docker # -# After the gateway is running, point the CLI at it with either: -# openshell --gateway docker-dev -# openshell gateway use docker-dev # then plain `openshell ` +# This script also writes ~/.config/openshell/active_gateway so the +# `openshell` CLI automatically targets this gateway in subsequent shells. +# No need to run `openshell gateway select`. Inside this repo you can +# override per-developer with OPENSHELL_GATEWAY in `.env` (mise loads it). +# An explicit `--gateway` / `--gateway-endpoint` flag still wins. set -euo pipefail @@ -86,6 +92,18 @@ register_gateway_metadata() { EOF } +# Mirror what `openshell gateway select ` does: write the gateway name +# to $XDG_CONFIG_HOME/openshell/active_gateway. The CLI picks it up as the +# default target when neither --gateway nor OPENSHELL_GATEWAY is set. +save_active_gateway() { + local name=$1 + local config_home active_gateway_path + config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" + active_gateway_path="${config_home}/openshell/active_gateway" + mkdir -p "$(dirname "${active_gateway_path}")" + printf '%s' "${name}" >"${active_gateway_path}" +} + if [[ ! "${GATEWAY_NAME}" =~ ^[A-Za-z0-9._-]+$ ]]; then echo "ERROR: OPENSHELL_DOCKER_GATEWAY_NAME must contain only letters, numbers, dots, underscores, or dashes" >&2 exit 2 @@ -161,6 +179,7 @@ mkdir -p "${STATE_DIR}" GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" +save_active_gateway "${GATEWAY_NAME}" echo "Starting standalone Docker gateway..." echo " gateway: ${GATEWAY_NAME}" @@ -168,9 +187,9 @@ echo " endpoint: ${GATEWAY_ENDPOINT}" echo " namespace: ${SANDBOX_NAMESPACE}" echo " state dir: ${STATE_DIR}" echo -echo "Point the CLI at this gateway with one of:" -echo " openshell --gateway ${GATEWAY_NAME} status" -echo " openshell gateway select ${GATEWAY_NAME}" +echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway" +echo "by default — just run \`openshell \`. Override with --gateway" +echo "or by setting OPENSHELL_GATEWAY (e.g. in .env)." echo exec "${GATEWAY_BIN}" \ diff --git a/tasks/scripts/gateway-start.sh b/tasks/scripts/gateway-start.sh new file mode 100755 index 000000000..ee160db8d --- /dev/null +++ b/tasks/scripts/gateway-start.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Dispatcher for `mise run gateway:start `. +# +# Usage: +# mise run gateway:start # uses $OPENSHELL_GATEWAY_DRIVER or 'docker' +# mise run gateway:start docker +# mise run gateway:start vm +# +# This is a thin shim that forwards to the driver-specific script. The +# `gateway:docker` / `gateway:vm` mise aliases call those scripts directly. + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +DRIVER="${1:-${OPENSHELL_GATEWAY_DRIVER:-docker}}" + +case "${DRIVER}" in + docker) + exec bash "${ROOT}/tasks/scripts/gateway-docker.sh" + ;; + vm) + exec bash "${ROOT}/tasks/scripts/gateway-vm.sh" + ;; + *) + echo "ERROR: unknown gateway driver '${DRIVER}' (expected 'docker' or 'vm')" >&2 + echo "Usage: mise run gateway:start " >&2 + exit 2 + ;; +esac diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh index f0623a46c..008f23b65 100755 --- a/tasks/scripts/gateway-vm.sh +++ b/tasks/scripts/gateway-vm.sh @@ -6,6 +6,10 @@ # Start a standalone openshell-gateway backed by the VM compute driver # (openshell-driver-vm) for local manual testing. # +# Invocation: +# mise run gateway:start vm # canonical +# mise run gateway:vm # alias +# # Defaults: # - Plaintext HTTP on 127.0.0.1:18081 # - Dedicated CLI gateway "vm-dev" @@ -20,9 +24,11 @@ # OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:vm # OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:vm # -# After the gateway is running, point the CLI at it with either: -# openshell --gateway vm-dev -# openshell gateway select vm-dev # then plain `openshell ` +# This script also writes ~/.config/openshell/active_gateway so the +# `openshell` CLI automatically targets this gateway in subsequent shells. +# No need to run `openshell gateway select`. Inside this repo you can +# override per-developer with OPENSHELL_GATEWAY in `.env` (mise loads it). +# An explicit `--gateway` / `--gateway-endpoint` flag still wins. set -euo pipefail @@ -95,6 +101,18 @@ register_gateway_metadata() { EOF } +# Mirror what `openshell gateway select ` does: write the gateway name +# to $XDG_CONFIG_HOME/openshell/active_gateway. The CLI picks it up as the +# default target when neither --gateway nor OPENSHELL_GATEWAY is set. +save_active_gateway() { + local name=$1 + local config_home active_gateway_path + config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" + active_gateway_path="${config_home}/openshell/active_gateway" + mkdir -p "$(dirname "${active_gateway_path}")" + printf '%s' "${name}" >"${active_gateway_path}" +} + check_supervisor_cross_toolchain() { # The sandbox supervisor inside the guest is always Linux. On non-Linux # hosts (macOS) and on Linux hosts with a different arch than the guest, @@ -190,6 +208,7 @@ mkdir -p "${VM_DRIVER_STATE_DIR}" GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" +save_active_gateway "${GATEWAY_NAME}" echo "Starting standalone VM gateway..." echo " gateway: ${GATEWAY_NAME}" @@ -200,9 +219,9 @@ echo " driver: ${DRIVER_DIR}/openshell-driver-vm" echo " driver dir: ${VM_DRIVER_STATE_DIR}" echo " image: ${SANDBOX_IMAGE}" echo -echo "Point the CLI at this gateway with one of:" -echo " openshell --gateway ${GATEWAY_NAME} status" -echo " openshell gateway select ${GATEWAY_NAME}" +echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway" +echo "by default — just run \`openshell \`. Override with --gateway" +echo "or by setting OPENSHELL_GATEWAY (e.g. in .env)." echo GATEWAY_ARGS=( From 616584bba5bcd0956d9f6f25b50e6f3284a06318 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 16:05:08 -0700 Subject: [PATCH 06/11] wip --- crates/openshell-bootstrap/src/build.rs | 5 +- crates/openshell-cli/src/run.rs | 35 ++++---- crates/openshell-driver-vm/build.rs | 4 +- crates/openshell-driver-vm/src/driver.rs | 102 ++++++++++++----------- crates/openshell-driver-vm/src/rootfs.rs | 2 +- 5 files changed, 75 insertions(+), 73 deletions(-) diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index ecc4bffc9..d951f2833 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -56,8 +56,9 @@ pub fn encode_rootfs_tar_image_ref(path: &Path) -> Result { .canonicalize() .into_diagnostic() .wrap_err_with(|| format!("failed to resolve rootfs tar path {}", path.display()))?; - let file_url = Url::from_file_path(&canonical) - .map_err(|_| miette::miette!("failed to encode rootfs tar path {}", canonical.display()))?; + let file_url = Url::from_file_path(&canonical).map_err(|()| { + miette::miette!("failed to encode rootfs tar path {}", canonical.display()) + })?; Ok(format!( "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{}", &file_url[Position::BeforePath..] diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 8d3568885..f47de664b 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -202,19 +202,6 @@ enum KubeEventReason { /// every log line (helpful when debugging stuck provisioning, e.g. on /// the experimental VM gateway). fn should_show_provisioning_log(line: &openshell_core::proto::SandboxLogLine) -> bool { - if std::env::var("OPENSHELL_PROVISION_VERBOSE") - .map(|v| !v.is_empty() && v != "0" && v.to_ascii_lowercase() != "false") - .unwrap_or(false) - { - return true; - } - - // Always surface warnings and errors during provisioning. - let level = line.level.to_ascii_lowercase(); - if matches!(level.as_str(), "warn" | "warning" | "error") { - return true; - } - // Allowlisted substrings for info-level progress lines emitted by // the server compute layer and bundled drivers (VM / Docker). Keep // this short — anything not matching is suppressed by default. @@ -230,6 +217,20 @@ fn should_show_provisioning_log(line: &openshell_core::proto::SandboxLogLine) -> "Sandbox ready", "Supervisor connected", ]; + + if std::env::var("OPENSHELL_PROVISION_VERBOSE") + .map(|v| !v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false")) + .unwrap_or(false) + { + return true; + } + + // Always surface warnings and errors during provisioning. + let level = line.level.to_ascii_lowercase(); + if matches!(level.as_str(), "warn" | "warning" | "error") { + return true; + } + ALLOWLIST.iter().any(|needle| line.message.contains(needle)) } @@ -2363,12 +2364,7 @@ pub async fn sandbox_create( } else { line.level.to_uppercase() }; - eprintln!( - " {} {} {}", - ts.dimmed(), - level.dimmed(), - line.message, - ); + eprintln!(" {} {} {}", ts.dimmed(), level.dimmed(), line.message,); } } } @@ -6251,6 +6247,7 @@ mod tests { #[tokio::test] async fn http_health_check_supports_plain_http_endpoints() { + let _ = rustls::crypto::ring::default_provider().install_default(); let listener = TcpListener::bind("127.0.0.1:0").expect("bind listener"); let addr = listener.local_addr().expect("listener addr"); let server = thread::spawn(move || { diff --git a/crates/openshell-driver-vm/build.rs b/crates/openshell-driver-vm/build.rs index 36b3eb183..2316093f7 100644 --- a/crates/openshell-driver-vm/build.rs +++ b/crates/openshell-driver-vm/build.rs @@ -7,7 +7,7 @@ //! artifacts it needs to boot VMs without depending on the openshell-vm binary //! or crate. -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { @@ -134,7 +134,7 @@ fn main() { } } -fn generate_stub_resources(out_dir: &PathBuf, names: &[&str]) { +fn generate_stub_resources(out_dir: &Path, names: &[&str]) { for name in names { let path = out_dir.join(name); if !path.exists() { diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 5325152c8..6fc27dd69 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -53,9 +53,6 @@ const DRIVER_NAME: &str = "openshell-driver-vm"; const WATCH_BUFFER: usize = 256; const DEFAULT_VCPUS: u8 = 2; const DEFAULT_MEM_MIB: u32 = 2048; -/// gvproxy gateway IP — runs DNS, DHCP, and the gvproxy HTTP API. Does **not** -/// proxy arbitrary host ports. -const GVPROXY_GATEWAY_IP: &str = "192.168.127.1"; /// gvproxy host-loopback IP — gvproxy's TCP/UDP/ICMP forwarder NAT-rewrites /// this destination to the host's `127.0.0.1` and dials out from the host /// process. This is the only address that transparently reaches host-bound @@ -396,7 +393,10 @@ impl VmDriver { ), ); - let image_identity = match self.prepare_runtime_rootfs(&sandbox.id, &image_ref, &rootfs).await { + let image_identity = match self + .prepare_runtime_rootfs(&sandbox.id, &image_ref, &rootfs) + .await + { Ok(image_identity) => { info!( sandbox_id = %sandbox.id, @@ -415,13 +415,13 @@ impl VmDriver { return Err(err); } }; - if let Some(tls_paths) = tls_paths.as_ref() { - if let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await { - let _ = tokio::fs::remove_dir_all(&state_dir).await; - return Err(Status::internal(format!( - "prepare guest TLS materials failed: {err}" - ))); - } + if let Some(tls_paths) = tls_paths.as_ref() + && let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await + { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::internal(format!( + "prepare guest TLS materials failed: {err}" + ))); } if let Err(err) = @@ -578,12 +578,7 @@ impl VmDriver { // promotes the sandbox to `Ready` separately. self.publish_platform_event( sandbox.id.clone(), - platform_event( - "vm", - "Normal", - "Started", - "Started VM launcher".to_string(), - ), + platform_event("vm", "Normal", "Started", "Started VM launcher".to_string()), ); let snapshot = sandbox_snapshot(sandbox, provisioning_condition(), false); let process = Arc::new(Mutex::new(VmProcess { @@ -937,10 +932,7 @@ impl VmDriver { &image_identity_owned, ) .map_err(|err| { - format!( - "vm sandbox image '{}' is not base-compatible: {err}", - image_ref_owned - ) + format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") })?; create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) }) @@ -1041,10 +1033,7 @@ impl VmDriver { &image_identity_owned, ) .map_err(|err| { - format!( - "vm sandbox image '{}' is not base-compatible: {err}", - image_ref_owned - ) + format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") })?; create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) }) @@ -1405,6 +1394,7 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu Ok(()) } +#[allow(clippy::result_large_err)] fn parse_registry_reference(image_ref: &str) -> Result { Reference::try_from(image_ref).map_err(|err| { Status::failed_precondition(format!( @@ -1447,6 +1437,7 @@ fn linux_oci_arch() -> &'static str { } } +#[allow(clippy::result_large_err)] fn registry_auth(image_ref: &str) -> Result { let username = env_non_empty("OPENSHELL_REGISTRY_USERNAME"); let token = env_non_empty("OPENSHELL_REGISTRY_TOKEN"); @@ -1488,6 +1479,7 @@ fn image_reference_registry_host(image_ref: &str) -> &str { } impl VmDriver { + #[allow(clippy::too_many_arguments)] async fn pull_registry_image_rootfs( &self, sandbox_id: &str, @@ -1523,11 +1515,7 @@ impl VmDriver { .map_err(|err| Status::internal(format!("create layer staging dir failed: {err}")))?; let total_layers = manifest.layers.len(); - let total_bytes: i64 = manifest - .layers - .iter() - .map(|layer| layer.size.max(0)) - .sum(); + let total_bytes: i64 = manifest.layers.iter().map(|layer| layer.size.max(0)).sum(); for (index, layer) in manifest.layers.iter().enumerate() { // Emit a per-layer progress event so the CLI can show // "Layer 3/8 (12.4 MB)" as detail under the spinner. @@ -1572,16 +1560,11 @@ impl VmDriver { /// Emit a `Pulled` platform event with a message that mirrors the /// kubelet's `Successfully pulled image ... Image size: N bytes.` /// format so the CLI's `extract_image_size` parser works unchanged. - async fn publish_pulled_event( - &self, - sandbox_id: &str, - image_ref: &str, - archive_path: &Path, - ) { - let size_suffix = match tokio::fs::metadata(archive_path).await { - Ok(meta) => format!(" Image size: {} bytes.", meta.len()), - Err(_) => String::new(), - }; + async fn publish_pulled_event(&self, sandbox_id: &str, image_ref: &str, archive_path: &Path) { + let size_suffix = tokio::fs::metadata(archive_path).await.map_or_else( + |_| String::new(), + |meta| format!(" Image size: {} bytes.", meta.len()), + ); self.publish_platform_event( sandbox_id.to_string(), platform_event( @@ -1681,7 +1664,7 @@ fn compute_file_sha256(path: &Path) -> Result { fn compute_file_sha256_hex(path: &Path) -> Result { let mut file = fs::File::open(path).map_err(|err| format!("open {}: {err}", path.display()))?; let mut hasher = Sha256::new(); - let mut buffer = [0_u8; 64 * 1024]; + let mut buffer = vec![0_u8; 64 * 1024].into_boxed_slice(); loop { let read = file .read(&mut buffer) @@ -1724,6 +1707,9 @@ fn extract_tar_reader_to_dir(reader: impl Read, dest: &Path) -> Result<(), Strin .map_err(|err| format!("extract layer into {}: {err}", dest.display())) } +// `media_type` is an OCI media type string (e.g. `application/vnd.oci.image.layer.v1.tar+gzip`), +// not a filesystem path, so case-sensitive comparison is correct. +#[allow(clippy::case_sensitive_file_extension_comparisons)] fn layer_compression_from_media_type(media_type: &str) -> Result { if media_type.is_empty() { return Err("layer media type is missing".to_string()); @@ -1756,7 +1742,7 @@ fn merge_layer_directory(source_dir: &Path, target_dir: &Path) -> Result<(), Str .map_err(|err| format!("read {}: {err}", source_dir.display()))? .collect::, _>>() .map_err(|err| format!("read {}: {err}", source_dir.display()))?; - entries.sort_by_key(|entry| entry.file_name()); + entries.sort_by_key(fs::DirEntry::file_name); if entries .iter() @@ -2304,7 +2290,7 @@ mod tests { }), ..Default::default() }; - validate_vm_sandbox(&sandbox).expect("template.image should be accepted"); + validate_vm_sandbox(&sandbox, false).expect("template.image should be accepted"); } #[test] @@ -2318,6 +2304,11 @@ mod tests { registry: Arc::new(Mutex::new(HashMap::new())), image_cache_lock: Arc::new(Mutex::new(())), events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), }; assert_eq!(driver.capabilities().default_image, "openshell/sandbox:dev"); @@ -2334,6 +2325,11 @@ mod tests { registry: Arc::new(Mutex::new(HashMap::new())), image_cache_lock: Arc::new(Mutex::new(())), events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), }; let sandbox = Sandbox { spec: Some(SandboxSpec { @@ -2363,6 +2359,11 @@ mod tests { registry: Arc::new(Mutex::new(HashMap::new())), image_cache_lock: Arc::new(Mutex::new(())), events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), }; let sandbox = Sandbox { spec: Some(SandboxSpec { @@ -2386,6 +2387,11 @@ mod tests { registry: Arc::new(Mutex::new(HashMap::new())), image_cache_lock: Arc::new(Mutex::new(())), events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), }; let sandbox = Sandbox { spec: Some(SandboxSpec { @@ -2502,14 +2508,12 @@ mod tests { format!("http://{OPENSHELL_HOST_GATEWAY_ALIAS}:8080") ); assert_eq!( - guest_visible_openshell_endpoint(&format!( - "http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080" - )), + guest_visible_openshell_endpoint(&format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080")), format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080") ); assert_eq!( - guest_visible_openshell_endpoint(&format!("http://{GVPROXY_GATEWAY_IP}:8080")), - format!("http://{GVPROXY_GATEWAY_IP}:8080") + guest_visible_openshell_endpoint("http://192.168.127.1:8080"), + "http://192.168.127.1:8080" ); assert_eq!( guest_visible_openshell_endpoint("https://gateway.internal:8443"), @@ -2689,7 +2693,7 @@ mod tests { fn validate_openshell_endpoint_accepts_host_gateway() { validate_openshell_endpoint("http://host.containers.internal:8080") .expect("guest-reachable host alias should be accepted"); - validate_openshell_endpoint(&format!("http://{GVPROXY_GATEWAY_IP}:8080")) + validate_openshell_endpoint("http://192.168.127.1:8080") .expect("gateway IP should be accepted"); validate_openshell_endpoint(&format!("http://{OPENSHELL_HOST_GATEWAY_ALIAS}:8080")) .expect("openshell host alias should be accepted"); diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index 929641945..4eeb28917 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -74,7 +74,7 @@ fn append_rootfs_tree_to_archive( .map_err(|e| format!("read {}: {e}", source.display()))? .collect::, _>>() .map_err(|e| format!("read {}: {e}", source.display()))?; - entries.sort_by_key(|entry| entry.file_name()); + entries.sort_by_key(fs::DirEntry::file_name); for entry in entries { let entry_name = entry.file_name(); From 7d73d335fbf274bba5219d04263f7ca02f15c910 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 19:14:29 -0700 Subject: [PATCH 07/11] cleanup --- Cargo.lock | 2 + architecture/sandbox-custom-containers.md | 5 +- crates/openshell-bootstrap/Cargo.toml | 2 + crates/openshell-bootstrap/src/build.rs | 178 +++++++++++++++++++++ crates/openshell-bootstrap/src/metadata.rs | 16 ++ crates/openshell-cli/src/completers.rs | 2 + crates/openshell-cli/src/main.rs | 2 + crates/openshell-cli/src/run.rs | 96 ++++------- crates/openshell-driver-vm/src/driver.rs | 101 +++++++++++- crates/openshell-driver-vm/src/main.rs | 4 + crates/openshell-server/src/lib.rs | 77 ++++----- tasks/gateway.toml | 20 --- tasks/scripts/gateway-docker.sh | 31 +--- tasks/scripts/gateway-start.sh | 34 ---- tasks/scripts/gateway-vm.sh | 66 +++++++- 15 files changed, 437 insertions(+), 199 deletions(-) delete mode 100755 tasks/scripts/gateway-start.sh diff --git a/Cargo.lock b/Cargo.lock index f93440a39..572bd5f3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3278,11 +3278,13 @@ dependencies = [ "bollard", "bytes", "futures", + "hmac", "miette", "openshell-core", "rcgen", "serde", "serde_json", + "sha2 0.10.9", "tar", "tempfile", "tokio", diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md index 5d482ffe0..69e5c10d4 100644 --- a/architecture/sandbox-custom-containers.md +++ b/architecture/sandbox-custom-containers.md @@ -33,8 +33,9 @@ The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sa When `--from` points to a Dockerfile or directory, the CLI: 1. Builds the image locally via the Docker daemon (respecting `.dockerignore`). -2. Pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. -3. Creates the sandbox with the resulting image tag. +2. For a local Kubernetes gateway, pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. +3. For a local VM gateway, exports the built image as a rootfs tar artifact and passes the VM driver a gateway-signed internal artifact reference. The driver rejects unsigned `openshell-rootfs-tar:` references so API clients cannot point it at arbitrary gateway host files. +4. Creates the sandbox with the resulting image tag or VM artifact reference. ## How It Works diff --git a/crates/openshell-bootstrap/Cargo.toml b/crates/openshell-bootstrap/Cargo.toml index 30fd4fbfc..821817fd8 100644 --- a/crates/openshell-bootstrap/Cargo.toml +++ b/crates/openshell-bootstrap/Cargo.toml @@ -16,10 +16,12 @@ base64 = "0.22" bollard = { version = "0.20", features = ["ssh"] } bytes = { workspace = true } futures = { workspace = true } +hmac = "0.12" miette = { workspace = true } rcgen = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +sha2 = "0.10" url = { workspace = true } tar = "0.4" tempfile = "3" diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index d951f2833..3191b88b1 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -9,15 +9,21 @@ //! runtime, while the VM backend can export the built image as a rootfs tar. use std::collections::HashMap; +use std::fs::File; +use std::io::Read; use std::path::{Path, PathBuf}; +use base64::Engine as _; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; use bollard::Docker; use bollard::models::ContainerCreateBody; use bollard::query_parameters::{ BuildImageOptionsBuilder, CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder, }; use futures::StreamExt; +use hmac::{Hmac, Mac}; use miette::{IntoDiagnostic, Result, WrapErr}; +use sha2::{Digest, Sha256}; use tokio::io::AsyncWriteExt; use url::{Position, Url}; @@ -26,6 +32,15 @@ use crate::push::push_local_images; /// Pseudo-image URI scheme used to hand a local rootfs tar artifact to the VM driver. pub const ROOTFS_TAR_IMAGE_REF_SCHEME: &str = "openshell-rootfs-tar"; +const ROOTFS_TAR_IMAGE_REF_VERSION: &str = "v1"; +type HmacSha256 = Hmac; + +/// Authenticated VM rootfs tar artifact decoded from an internal image reference. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RootfsTarImageRef { + pub path: PathBuf, + pub digest: String, +} /// Build a container image from a Dockerfile using the local Docker daemon. /// @@ -65,6 +80,25 @@ pub fn encode_rootfs_tar_image_ref(path: &Path) -> Result { )) } +/// Encode a local rootfs tar path as an authenticated VM-driver artifact reference. +pub fn encode_authenticated_rootfs_tar_image_ref(path: &Path, secret: &str) -> Result { + let canonical = path + .canonicalize() + .into_diagnostic() + .wrap_err_with(|| format!("failed to resolve rootfs tar path {}", path.display()))?; + let digest = compute_file_sha256_hex(&canonical)?; + let file_url = Url::from_file_path(&canonical).map_err(|()| { + miette::miette!("failed to encode rootfs tar path {}", canonical.display()) + })?; + let file_url = file_url.to_string(); + let signature = sign_rootfs_tar_image_ref(secret, &file_url, &digest)?; + Ok(format!( + "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{ROOTFS_TAR_IMAGE_REF_VERSION}:{}:{digest}:{}", + URL_SAFE_NO_PAD.encode(file_url.as_bytes()), + URL_SAFE_NO_PAD.encode(signature), + )) +} + /// Decode a VM-driver rootfs tar image reference back to a local filesystem path. pub fn decode_rootfs_tar_image_ref(image_ref: &str) -> Option { let remainder = image_ref.strip_prefix(&format!("{ROOTFS_TAR_IMAGE_REF_SCHEME}:"))?; @@ -72,6 +106,62 @@ pub fn decode_rootfs_tar_image_ref(image_ref: &str) -> Option { Url::parse(&file_url).ok()?.to_file_path().ok() } +/// Decode and verify an authenticated VM-driver rootfs tar image reference. +pub fn decode_authenticated_rootfs_tar_image_ref( + image_ref: &str, + secret: &str, +) -> Result> { + let Some(remainder) = image_ref.strip_prefix(&format!( + "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{ROOTFS_TAR_IMAGE_REF_VERSION}:" + )) else { + return Ok(None); + }; + + let mut parts = remainder.split(':'); + let encoded_url = parts + .next() + .filter(|value| !value.is_empty()) + .ok_or_else(|| miette::miette!("missing rootfs artifact path"))?; + let digest = parts + .next() + .filter(|value| !value.is_empty()) + .ok_or_else(|| miette::miette!("missing rootfs artifact digest"))?; + let encoded_signature = parts + .next() + .filter(|value| !value.is_empty()) + .ok_or_else(|| miette::miette!("missing rootfs artifact signature"))?; + if parts.next().is_some() { + return Err(miette::miette!("malformed rootfs artifact reference")); + } + validate_sha256_hex(digest)?; + + let file_url_bytes = URL_SAFE_NO_PAD + .decode(encoded_url) + .map_err(|err| miette::miette!("invalid rootfs artifact path encoding: {err}"))?; + let file_url = String::from_utf8(file_url_bytes) + .map_err(|err| miette::miette!("invalid rootfs artifact path encoding: {err}"))?; + + let signature = URL_SAFE_NO_PAD + .decode(encoded_signature) + .map_err(|err| miette::miette!("invalid rootfs artifact signature encoding: {err}"))?; + verify_rootfs_tar_image_ref_signature(secret, &file_url, digest, &signature)?; + + let url = Url::parse(&file_url) + .into_diagnostic() + .wrap_err("invalid rootfs artifact URL")?; + if url.scheme() != "file" { + return Err(miette::miette!("rootfs artifact URL must use file scheme")); + } + let path = url + .to_file_path() + .map_err(|()| miette::miette!("rootfs artifact URL is not a local file path"))?; + + Ok(Some(RootfsTarImageRef { + path, + digest: format!("sha256:{digest}"), + })) +} + /// Export a locally-built Docker image as a persistent rootfs tar artifact for the VM driver. pub async fn export_local_image_rootfs( image_ref: &str, @@ -103,6 +193,63 @@ pub async fn export_local_image_rootfs( Ok(output_path) } +fn compute_file_sha256_hex(path: &Path) -> Result { + let mut file = File::open(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to open {}", path.display()))?; + let mut hasher = Sha256::new(); + let mut buf = [0u8; 64 * 1024]; + loop { + let read = file + .read(&mut buf) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read {}", path.display()))?; + if read == 0 { + break; + } + hasher.update(&buf[..read]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +fn validate_sha256_hex(value: &str) -> Result<()> { + if value.len() == 64 && value.bytes().all(|byte| byte.is_ascii_hexdigit()) { + Ok(()) + } else { + Err(miette::miette!("rootfs artifact digest must be sha256 hex")) + } +} + +fn rootfs_tar_image_ref_message(file_url: &str, digest: &str) -> String { + format!("{ROOTFS_TAR_IMAGE_REF_VERSION}\n{file_url}\nsha256:{digest}") +} + +fn rootfs_tar_image_ref_hmac(secret: &str) -> Result { + if secret.trim().is_empty() { + return Err(miette::miette!("rootfs artifact secret is empty")); + } + HmacSha256::new_from_slice(secret.as_bytes()) + .map_err(|err| miette::miette!("invalid rootfs artifact secret: {err}")) +} + +fn sign_rootfs_tar_image_ref(secret: &str, file_url: &str, digest: &str) -> Result> { + let mut mac = rootfs_tar_image_ref_hmac(secret)?; + mac.update(rootfs_tar_image_ref_message(file_url, digest).as_bytes()); + Ok(mac.finalize().into_bytes().to_vec()) +} + +fn verify_rootfs_tar_image_ref_signature( + secret: &str, + file_url: &str, + digest: &str, + signature: &[u8], +) -> Result<()> { + let mut mac = rootfs_tar_image_ref_hmac(secret)?; + mac.update(rootfs_tar_image_ref_message(file_url, digest).as_bytes()); + mac.verify_slice(signature) + .map_err(|_| miette::miette!("rootfs artifact signature verification failed")) +} + /// Push a locally-built image into the gateway's containerd runtime. #[allow(clippy::implicit_hasher)] pub async fn push_image_into_gateway( @@ -639,4 +786,35 @@ mod tests { assert_eq!(decoded, tar_path.canonicalize().unwrap()); } + + #[test] + fn authenticated_rootfs_tar_image_ref_round_trip() { + let dir = tempfile::tempdir().unwrap(); + let tar_path = dir.path().join("rootfs tar.tar"); + fs::write(&tar_path, "rootfs").unwrap(); + + let encoded = encode_authenticated_rootfs_tar_image_ref(&tar_path, "secret").unwrap(); + let decoded = decode_authenticated_rootfs_tar_image_ref(&encoded, "secret") + .unwrap() + .unwrap(); + + assert_eq!(decoded.path, tar_path.canonicalize().unwrap()); + assert_eq!( + decoded.digest, + "sha256:3c47ef972d531d524daa15fa33dd885dd23de6221bbd10a29eb42ecfcf2ef422" + ); + } + + #[test] + fn authenticated_rootfs_tar_image_ref_rejects_wrong_secret() { + let dir = tempfile::tempdir().unwrap(); + let tar_path = dir.path().join("rootfs.tar"); + fs::write(&tar_path, "rootfs").unwrap(); + + let encoded = encode_authenticated_rootfs_tar_image_ref(&tar_path, "secret").unwrap(); + let err = decode_authenticated_rootfs_tar_image_ref(&encoded, "wrong-secret") + .expect_err("wrong secret should fail"); + + assert!(err.to_string().contains("signature verification failed")); + } } diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index 41e75e811..5892dd15f 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -46,6 +46,14 @@ pub struct GatewayMetadata { alias = "cf_auth_url" )] pub edge_auth_url: Option, + + /// Local VM driver state directory for standalone VM gateways. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vm_driver_state_dir: Option, + + /// Local secret used to authenticate VM rootfs artifact references. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vm_rootfs_artifact_secret: Option, } impl GatewayMetadata { @@ -136,6 +144,8 @@ pub fn create_gateway_metadata_with_host( auth_mode: disable_tls.then(|| "plaintext".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, } } @@ -463,6 +473,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; let json = serde_json::to_string(&meta).unwrap(); let parsed: GatewayMetadata = serde_json::from_str(&json).unwrap(); @@ -558,6 +570,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; assert_eq!(meta.gateway_host(), None); } @@ -574,6 +588,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; assert_eq!(meta.gateway_host(), Some("10.0.0.5")); } diff --git a/crates/openshell-cli/src/completers.rs b/crates/openshell-cli/src/completers.rs index c8b5c82a3..31a98158c 100644 --- a/crates/openshell-cli/src/completers.rs +++ b/crates/openshell-cli/src/completers.rs @@ -183,6 +183,8 @@ mod tests { auth_mode: Some("cloudflare_jwt".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }, ) .unwrap(); diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 57f3dbc84..da7b3d13a 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2845,6 +2845,8 @@ mod tests { auth_mode: Some("cloudflare_jwt".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, } } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index f47de664b..edb3da982 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -194,46 +194,6 @@ enum KubeEventReason { Started, } -/// Decide whether a server/driver log line should be surfaced during -/// `sandbox create` provisioning. -/// -/// Defaults to a curated allowlist so the spinner stays focused on -/// meaningful progress. Set `OPENSHELL_PROVISION_VERBOSE=1` to surface -/// every log line (helpful when debugging stuck provisioning, e.g. on -/// the experimental VM gateway). -fn should_show_provisioning_log(line: &openshell_core::proto::SandboxLogLine) -> bool { - // Allowlisted substrings for info-level progress lines emitted by - // the server compute layer and bundled drivers (VM / Docker). Keep - // this short — anything not matching is suppressed by default. - const ALLOWLIST: &[&str] = &[ - "Sandbox phase changed", - "Pulling image", - "Pulled image", - "Extracting", - "Preparing rootfs", - "Booting VM", - "Starting VM", - "Starting sandbox", - "Sandbox ready", - "Supervisor connected", - ]; - - if std::env::var("OPENSHELL_PROVISION_VERBOSE") - .map(|v| !v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false")) - .unwrap_or(false) - { - return true; - } - - // Always surface warnings and errors during provisioning. - let level = line.level.to_ascii_lowercase(); - if matches!(level.as_str(), "warn" | "warning" | "error") { - return true; - } - - ALLOWLIST.iter().any(|needle| line.message.contains(needle)) -} - /// Map a Kubernetes event reason string to an enum. fn parse_kube_event_reason(reason: &str) -> Option { match reason { @@ -937,6 +897,8 @@ fn plaintext_gateway_metadata( auth_mode: Some("plaintext".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, } } @@ -1138,6 +1100,8 @@ pub async fn gateway_add( auth_mode: Some("mtls".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; store_gateway_metadata(name, &metadata)?; @@ -1167,6 +1131,8 @@ pub async fn gateway_add( auth_mode: Some("cloudflare_jwt".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; store_gateway_metadata(name, &metadata)?; @@ -2342,31 +2308,6 @@ pub async fn sandbox_create( if !saw_gateway_ready && line.message.contains("listening") { saw_gateway_ready = true; } - - // Surface log lines as progress so users aren't staring at a - // silent spinner while non-Kubernetes drivers (VM, Docker) do - // their work. Drivers/server tracing with a `sandbox_id` - // field flows through here as Log payloads. - // - // The default filter keeps output focused on user-relevant - // progress: warn/error always, plus a curated allowlist of - // info messages. Set OPENSHELL_PROVISION_VERBOSE=1 to see - // every log line during provisioning. - if !line.message.is_empty() && should_show_provisioning_log(&line) { - if let Some(d) = display.as_mut() { - // Interactive: tuck the message under the spinner - // as detail so the checklist stays clean. - d.set_active_detail(&line.message); - } else { - let ts = format_timestamp(provision_start.elapsed()); - let level = if line.level.is_empty() { - "INFO".to_string() - } else { - line.level.to_uppercase() - }; - eprintln!(" {} {} {}", ts.dimmed(), level.dimmed(), line.message,); - } - } } Some(openshell_core::proto::sandbox_stream_event::Payload::Event(ev)) => { // Map Kubernetes events to provisioning steps. @@ -2794,7 +2735,20 @@ async fn build_from_dockerfile( let rootfs_tar = openshell_bootstrap::build::export_local_image_rootfs(&tag, &mut on_log) .await .wrap_err("failed to export built image as a VM rootfs artifact")?; - let artifact_ref = openshell_bootstrap::build::encode_rootfs_tar_image_ref(&rootfs_tar)?; + let artifact_secret = metadata + .as_ref() + .and_then(|metadata| metadata.vm_rootfs_artifact_secret.as_deref()) + .filter(|secret| !secret.trim().is_empty()) + .ok_or_else(|| { + miette!( + "local Dockerfile sources for VM gateways require authenticated rootfs artifact metadata; restart gateway '{}' with a current `mise run gateway:vm`", + gateway_name + ) + })?; + let artifact_ref = openshell_bootstrap::build::encode_authenticated_rootfs_tar_image_ref( + &rootfs_tar, + artifact_secret, + )?; eprintln!(); eprintln!( @@ -5705,6 +5659,8 @@ mod tests { auth_mode: Some("cloudflare_jwt".to_string()), edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, } } @@ -5890,6 +5846,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; assert!(!dockerfile_sources_supported_for_gateway(Some(&metadata))); @@ -5907,6 +5865,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; assert!(dockerfile_sources_supported_for_gateway(Some(&metadata))); @@ -6128,6 +6088,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }, ]; @@ -6163,6 +6125,8 @@ mod tests { auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, + vm_rootfs_artifact_secret: None, }; assert_eq!(gateway_auth_label(&gateway), "mtls"); diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 6fc27dd69..0270c5a5d 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -17,7 +17,9 @@ use oci_client::client::{Client as OciClient, ClientConfig}; use oci_client::manifest::{ImageIndexEntry, OciDescriptor}; use oci_client::secrets::RegistryAuth; use oci_client::{Reference, RegistryOperation}; -use openshell_bootstrap::build::decode_rootfs_tar_image_ref; +use openshell_bootstrap::build::{ + ROOTFS_TAR_IMAGE_REF_SCHEME, decode_authenticated_rootfs_tar_image_ref, +}; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, @@ -116,6 +118,7 @@ pub struct VmDriverConfig { pub gpu_enabled: bool, pub gpu_mem_mib: u32, pub gpu_vcpus: u8, + pub rootfs_artifact_secret: Option, } impl Default for VmDriverConfig { @@ -137,6 +140,7 @@ impl Default for VmDriverConfig { gpu_enabled: false, gpu_mem_mib: 8192, gpu_vcpus: 4, + rootfs_artifact_secret: None, } } } @@ -756,9 +760,32 @@ impl VmDriver { sandbox_id: &str, image_ref: &str, ) -> Result { - if let Some(rootfs_tar_path) = decode_rootfs_tar_image_ref(image_ref) { + if is_rootfs_tar_image_ref(image_ref) { + let secret = self + .config + .rootfs_artifact_secret + .as_deref() + .filter(|secret| !secret.trim().is_empty()) + .ok_or_else(|| { + Status::failed_precondition( + "vm rootfs tar image references require a gateway-issued artifact secret", + ) + })?; + let artifact = decode_authenticated_rootfs_tar_image_ref(image_ref, secret) + .map_err(|err| { + Status::failed_precondition(format!( + "invalid vm rootfs artifact reference: {err}" + )) + })? + .ok_or_else(|| { + Status::failed_precondition("invalid vm rootfs artifact reference") + })?; return self - .ensure_cached_rootfs_tar_image_rootfs_archive(image_ref, &rootfs_tar_path) + .ensure_cached_rootfs_tar_image_rootfs_archive( + image_ref, + &artifact.path, + &artifact.digest, + ) .await; } @@ -853,6 +880,7 @@ impl VmDriver { &self, image_ref: &str, rootfs_tar_path: &Path, + expected_digest: &str, ) -> Result { let rootfs_tar = rootfs_tar_path.to_path_buf(); let image_identity = tokio::task::spawn_blocking(move || compute_file_sha256(&rootfs_tar)) @@ -866,6 +894,11 @@ impl VmDriver { rootfs_tar_path.display() )) })?; + if image_identity != expected_digest { + return Err(Status::failed_precondition( + "vm rootfs artifact digest does not match the authenticated reference", + )); + } let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); if tokio::fs::metadata(&archive_path).await.is_ok() { @@ -1403,6 +1436,12 @@ fn parse_registry_reference(image_ref: &str) -> Result { }) } +fn is_rootfs_tar_image_ref(image_ref: &str) -> bool { + image_ref + .strip_prefix(ROOTFS_TAR_IMAGE_REF_SCHEME) + .is_some_and(|rest| rest.starts_with(':')) +} + fn registry_client() -> OciClient { OciClient::new(ClientConfig { platform_resolver: Some(Box::new(linux_platform_resolver)), @@ -1470,8 +1509,12 @@ fn env_non_empty(key: &str) -> Option { } fn image_reference_registry_host(image_ref: &str) -> &str { - let first = image_ref.split('/').next().unwrap_or(image_ref); - if first.contains('.') || first.contains(':') || first.eq_ignore_ascii_case("localhost") { + let mut parts = image_ref.splitn(2, '/'); + let first = parts.next().unwrap_or(image_ref); + let has_path = parts.next().is_some(); + if has_path + && (first.contains('.') || first.contains(':') || first.eq_ignore_ascii_case("localhost")) + { first } else { "docker.io" @@ -2524,16 +2567,49 @@ mod tests { #[test] fn image_reference_registry_host_defaults_to_docker_hub() { assert_eq!(image_reference_registry_host("ubuntu:24.04"), "docker.io"); + assert_eq!( + image_reference_registry_host("library/ubuntu:24.04"), + "docker.io" + ); assert_eq!( image_reference_registry_host("ghcr.io/nvidia/openshell/base:latest"), "ghcr.io" ); + assert_eq!( + image_reference_registry_host("localhost/example:dev"), + "localhost" + ); assert_eq!( image_reference_registry_host("localhost:5000/example/sandbox:dev"), "localhost:5000" ); } + #[tokio::test] + async fn ensure_cached_image_rootfs_archive_rejects_unsigned_rootfs_tar_refs() { + let base = unique_temp_dir(); + fs::create_dir_all(&base).unwrap(); + let artifact = base.join("rootfs.tar"); + fs::write(&artifact, "not a real rootfs").unwrap(); + let image_ref = openshell_bootstrap::build::encode_rootfs_tar_image_ref(&artifact).unwrap(); + let driver = test_driver(VmDriverConfig { + state_dir: base.join("driver-state"), + ..Default::default() + }); + + let err = driver + .ensure_cached_image_rootfs_archive("sandbox-123", &image_ref) + .await + .expect_err("unsigned rootfs tar refs must be rejected"); + + assert_eq!(err.code(), Code::FailedPrecondition); + assert!( + err.message().contains("gateway-issued artifact secret"), + "unexpected error: {}", + err.message() + ); + } + #[test] fn apply_layer_dir_to_rootfs_honors_whiteouts() { let base = unique_temp_dir(); @@ -2823,6 +2899,21 @@ mod tests { )) } + fn test_driver(config: VmDriverConfig) -> VmDriver { + VmDriver { + config, + launcher_bin: PathBuf::from("openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), + } + } + fn spawn_exited_child() -> Child { Command::new("sh") .arg("-c") diff --git a/crates/openshell-driver-vm/src/main.rs b/crates/openshell-driver-vm/src/main.rs index 596e6c88d..3f5d70c14 100644 --- a/crates/openshell-driver-vm/src/main.rs +++ b/crates/openshell-driver-vm/src/main.rs @@ -105,6 +105,9 @@ struct Args { #[arg(long, env = "OPENSHELL_VM_GPU_VCPUS", default_value_t = 4)] gpu_vcpus: u8, + #[arg(long, env = "OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET", hide = true)] + rootfs_artifact_secret: Option, + #[arg(long, hide = true)] vm_backend: Option, @@ -185,6 +188,7 @@ async fn main() -> Result<()> { gpu_enabled: args.gpu, gpu_mem_mib: args.gpu_mem_mib, gpu_vcpus: args.gpu_vcpus, + rootfs_artifact_secret: args.rootfs_artifact_secret, }) .await .map_err(|err| miette::miette!("{err}"))?; diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 6c59e9822..456d885fe 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -148,10 +148,14 @@ pub async fn run_server( if database_url.is_empty() { return Err(Error::config("database_url is required")); } - // The ssh_handshake_secret check is driver-scoped: the Kubernetes and - // Podman drivers inject the secret into sandbox env, while the VM and - // Docker drivers do not consume it. The check is enforced per-driver - // in `build_compute_runtime`. + let driver = configured_compute_driver(&config)?; + if config.ssh_handshake_secret.is_empty() + && !matches!(driver, ComputeDriverKind::Docker | ComputeDriverKind::Vm) + { + return Err(Error::config( + "ssh_handshake_secret is required. Set --ssh-handshake-secret or OPENSHELL_SSH_HANDSHAKE_SECRET", + )); + } let store = Arc::new(Store::connect(database_url).await?); @@ -366,35 +370,32 @@ async fn build_compute_runtime( info!(driver = %driver, "Using compute driver"); match driver { - ComputeDriverKind::Kubernetes => { - require_ssh_handshake_secret(config)?; - ComputeRuntime::new_kubernetes( - KubernetesComputeConfig { - namespace: config.sandbox_namespace.clone(), - default_image: config.sandbox_image.clone(), - image_pull_policy: config.sandbox_image_pull_policy.clone(), - grpc_endpoint: config.grpc_endpoint.clone(), - // Filesystem path to the supervisor's Unix-socket SSH daemon. - // The path lives in a root-only directory so only the - // supervisor can connect; the gateway reaches it through - // the RelayStream bridge, not directly. Override via - // `sandbox_ssh_socket_path` in the config for deployments - // where multiple supervisors share a filesystem. - ssh_socket_path: config.sandbox_ssh_socket_path.clone(), - ssh_handshake_secret: config.ssh_handshake_secret.clone(), - ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, - client_tls_secret_name: config.client_tls_secret_name.clone(), - host_gateway_ip: config.host_gateway_ip.clone(), - }, - store, - sandbox_index, - sandbox_watch_bus, - tracing_log_bus, - supervisor_sessions.clone(), - ) - .await - .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) - } + ComputeDriverKind::Kubernetes => ComputeRuntime::new_kubernetes( + KubernetesComputeConfig { + namespace: config.sandbox_namespace.clone(), + default_image: config.sandbox_image.clone(), + image_pull_policy: config.sandbox_image_pull_policy.clone(), + grpc_endpoint: config.grpc_endpoint.clone(), + // Filesystem path to the supervisor's Unix-socket SSH daemon. + // The path lives in a root-only directory so only the + // supervisor can connect; the gateway reaches it through the + // RelayStream bridge, not directly. Override via + // `sandbox_ssh_socket_path` in the config for deployments + // where multiple supervisors share a filesystem. + ssh_socket_path: config.sandbox_ssh_socket_path.clone(), + ssh_handshake_secret: config.ssh_handshake_secret.clone(), + ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, + client_tls_secret_name: config.client_tls_secret_name.clone(), + host_gateway_ip: config.host_gateway_ip.clone(), + }, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions.clone(), + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))), ComputeDriverKind::Docker => ComputeRuntime::new_docker( config.clone(), docker_config.clone(), @@ -421,7 +422,6 @@ async fn build_compute_runtime( .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } ComputeDriverKind::Podman => { - require_ssh_handshake_secret(config)?; let socket_path = std::env::var("OPENSHELL_PODMAN_SOCKET") .ok() .filter(|s| !s.is_empty()) @@ -473,15 +473,6 @@ async fn build_compute_runtime( } } -fn require_ssh_handshake_secret(config: &Config) -> Result<()> { - if config.ssh_handshake_secret.is_empty() { - return Err(Error::config( - "ssh_handshake_secret is required for this driver. Set --ssh-handshake-secret or OPENSHELL_SSH_HANDSHAKE_SECRET", - )); - } - Ok(()) -} - fn configured_compute_driver(config: &Config) -> Result { match config.compute_drivers.as_slice() { [] => Err(Error::config( diff --git a/tasks/gateway.toml b/tasks/gateway.toml index 23df38b3a..1df07cb95 100644 --- a/tasks/gateway.toml +++ b/tasks/gateway.toml @@ -2,26 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # Standalone gateway tasks -# -# Canonical entry point: -# mise run gateway:start # driver = docker | vm -# -# Backward-compatible aliases: -# mise run gateway:docker -# mise run gateway:vm -# -# All three commands also write ~/.config/openshell/active_gateway so the -# `openshell` CLI automatically targets the gateway you just started — no -# need to follow up with `openshell gateway select`. Inside this repo you -# can override the active gateway per-developer by setting OPENSHELL_GATEWAY -# in `.env` (mise loads it automatically). - -["gateway:start"] -description = "Run a standalone gateway with the given compute driver (docker | vm)" -usage = """ -arg "[driver]" help="Compute driver to start (docker | vm). Defaults to $OPENSHELL_GATEWAY_DRIVER or 'docker'." -""" -run = "bash tasks/scripts/gateway-start.sh" ["gateway:docker"] description = "Run a standalone gateway with the bundled Docker compute driver" diff --git a/tasks/scripts/gateway-docker.sh b/tasks/scripts/gateway-docker.sh index 44e2ec7c7..23527741f 100644 --- a/tasks/scripts/gateway-docker.sh +++ b/tasks/scripts/gateway-docker.sh @@ -6,10 +6,6 @@ # Start a standalone openshell-gateway backed by the Docker compute driver for # local manual testing. # -# Invocation: -# mise run gateway:start docker # canonical -# mise run gateway:docker # alias -# # Defaults: # - Plaintext HTTP on 127.0.0.1:18080 # - Dedicated sandbox namespace "docker-dev" @@ -21,11 +17,9 @@ # OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:docker # OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:docker # -# This script also writes ~/.config/openshell/active_gateway so the -# `openshell` CLI automatically targets this gateway in subsequent shells. -# No need to run `openshell gateway select`. Inside this repo you can -# override per-developer with OPENSHELL_GATEWAY in `.env` (mise loads it). -# An explicit `--gateway` / `--gateway-endpoint` flag still wins. +# After the gateway is running, point the CLI at it with either: +# openshell --gateway docker-dev +# openshell gateway use docker-dev # then plain `openshell ` set -euo pipefail @@ -92,18 +86,6 @@ register_gateway_metadata() { EOF } -# Mirror what `openshell gateway select ` does: write the gateway name -# to $XDG_CONFIG_HOME/openshell/active_gateway. The CLI picks it up as the -# default target when neither --gateway nor OPENSHELL_GATEWAY is set. -save_active_gateway() { - local name=$1 - local config_home active_gateway_path - config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" - active_gateway_path="${config_home}/openshell/active_gateway" - mkdir -p "$(dirname "${active_gateway_path}")" - printf '%s' "${name}" >"${active_gateway_path}" -} - if [[ ! "${GATEWAY_NAME}" =~ ^[A-Za-z0-9._-]+$ ]]; then echo "ERROR: OPENSHELL_DOCKER_GATEWAY_NAME must contain only letters, numbers, dots, underscores, or dashes" >&2 exit 2 @@ -179,7 +161,6 @@ mkdir -p "${STATE_DIR}" GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" -save_active_gateway "${GATEWAY_NAME}" echo "Starting standalone Docker gateway..." echo " gateway: ${GATEWAY_NAME}" @@ -187,9 +168,9 @@ echo " endpoint: ${GATEWAY_ENDPOINT}" echo " namespace: ${SANDBOX_NAMESPACE}" echo " state dir: ${STATE_DIR}" echo -echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway" -echo "by default — just run \`openshell \`. Override with --gateway" -echo "or by setting OPENSHELL_GATEWAY (e.g. in .env)." +echo "Point the CLI at this gateway with one of:" +echo " openshell --gateway ${GATEWAY_NAME} status" +echo " openshell gateway select ${GATEWAY_NAME}" echo exec "${GATEWAY_BIN}" \ diff --git a/tasks/scripts/gateway-start.sh b/tasks/scripts/gateway-start.sh deleted file mode 100755 index ee160db8d..000000000 --- a/tasks/scripts/gateway-start.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -# Dispatcher for `mise run gateway:start `. -# -# Usage: -# mise run gateway:start # uses $OPENSHELL_GATEWAY_DRIVER or 'docker' -# mise run gateway:start docker -# mise run gateway:start vm -# -# This is a thin shim that forwards to the driver-specific script. The -# `gateway:docker` / `gateway:vm` mise aliases call those scripts directly. - -set -euo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" - -DRIVER="${1:-${OPENSHELL_GATEWAY_DRIVER:-docker}}" - -case "${DRIVER}" in - docker) - exec bash "${ROOT}/tasks/scripts/gateway-docker.sh" - ;; - vm) - exec bash "${ROOT}/tasks/scripts/gateway-vm.sh" - ;; - *) - echo "ERROR: unknown gateway driver '${DRIVER}' (expected 'docker' or 'vm')" >&2 - echo "Usage: mise run gateway:start " >&2 - exit 2 - ;; -esac diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh index 008f23b65..e67f61347 100755 --- a/tasks/scripts/gateway-vm.sh +++ b/tasks/scripts/gateway-vm.sh @@ -7,8 +7,7 @@ # (openshell-driver-vm) for local manual testing. # # Invocation: -# mise run gateway:start vm # canonical -# mise run gateway:vm # alias +# mise run gateway:vm # # Defaults: # - Plaintext HTTP on 127.0.0.1:18081 @@ -23,6 +22,7 @@ # OPENSHELL_VM_GATEWAY_NAME=my-vm-gateway mise run gateway:vm # OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:vm # OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:vm +# mise run gateway:vm -- --gpu # # This script also writes ~/.config/openshell/active_gateway so the # `openshell` CLI automatically targets this gateway in subsequent shells. @@ -84,21 +84,27 @@ register_gateway_metadata() { local name=$1 local endpoint=$2 local port=$3 + local vm_driver_state_dir=$4 + local rootfs_artifact_secret=$5 local config_home gateway_dir config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" gateway_dir="${config_home}/openshell/gateways/${name}" mkdir -p "${gateway_dir}" + chmod 700 "${gateway_dir}" 2>/dev/null || true cat >"${gateway_dir}/metadata.json" </dev/null || true } # Mirror what `openshell gateway select ` does: write the gateway name @@ -146,6 +152,51 @@ check_supervisor_cross_toolchain() { fi } +generate_rootfs_artifact_secret() { + od -An -N32 -tx1 /dev/urandom | tr -d ' \n' +} + +VM_GPU="$(normalize_bool "${OPENSHELL_VM_GPU:-false}")" + +while [ "$#" -gt 0 ]; do + case "$1" in + --gpu) + VM_GPU="true" + shift + ;; + --gpu-mem-mib) + if [ "$#" -lt 2 ]; then + echo "ERROR: --gpu-mem-mib requires a value" >&2 + exit 2 + fi + export OPENSHELL_VM_GPU_MEM_MIB="$2" + shift 2 + ;; + --gpu-vcpus) + if [ "$#" -lt 2 ]; then + echo "ERROR: --gpu-vcpus requires a value" >&2 + exit 2 + fi + export OPENSHELL_VM_GPU_VCPUS="$2" + shift 2 + ;; + -h|--help) + echo "Usage: mise run gateway:vm -- [--gpu] [--gpu-mem-mib MIB] [--gpu-vcpus N]" + exit 0 + ;; + *) + echo "ERROR: unknown gateway-vm option '$1'" >&2 + exit 2 + ;; + esac +done + +if [ "${VM_GPU}" = "true" ]; then + export OPENSHELL_VM_GPU="true" +else + unset OPENSHELL_VM_GPU +fi + if [[ ! "${GATEWAY_NAME}" =~ ^[A-Za-z0-9._-]+$ ]]; then echo "ERROR: OPENSHELL_VM_GATEWAY_NAME must contain only letters, numbers, dots, underscores, or dashes" >&2 exit 2 @@ -168,6 +219,12 @@ VM_DRIVER_STATE_DIR_DEFAULT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}/openshell-v VM_DRIVER_STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${VM_DRIVER_STATE_DIR_DEFAULT}}" DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" +ROOTFS_ARTIFACT_SECRET="${OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET:-$(generate_rootfs_artifact_secret)}" +if [[ ! "${ROOTFS_ARTIFACT_SECRET}" =~ ^[A-Za-z0-9._~=-]+$ ]]; then + echo "ERROR: OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET must contain only URL-safe characters" >&2 + exit 2 +fi +export OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET="${ROOTFS_ARTIFACT_SECRET}" # Build prerequisites: VM runtime artifacts + bundled supervisor. if [ ! -d "${COMPRESSED_DIR}" ] \ @@ -207,7 +264,7 @@ mkdir -p "${STATE_DIR}" mkdir -p "${VM_DRIVER_STATE_DIR}" GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" -register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" +register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" "${VM_DRIVER_STATE_DIR}" "${ROOTFS_ARTIFACT_SECRET}" save_active_gateway "${GATEWAY_NAME}" echo "Starting standalone VM gateway..." @@ -217,6 +274,7 @@ echo " namespace: ${SANDBOX_NAMESPACE}" echo " state dir: ${STATE_DIR}" echo " driver: ${DRIVER_DIR}/openshell-driver-vm" echo " driver dir: ${VM_DRIVER_STATE_DIR}" +echo " gpu: ${VM_GPU}" echo " image: ${SANDBOX_IMAGE}" echo echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway" From 3032cc47a191e45fcb3200224c9704ca48cbeaae Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 20:44:27 -0700 Subject: [PATCH 08/11] refactor(vm): move local image handling into driver --- Cargo.lock | 5 +- architecture/custom-vm-runtime.md | 23 +- architecture/sandbox-custom-containers.md | 11 +- crates/openshell-bootstrap/Cargo.toml | 3 - crates/openshell-bootstrap/src/build.rs | 333 +--------------- crates/openshell-bootstrap/src/metadata.rs | 8 - crates/openshell-cli/src/completers.rs | 1 - crates/openshell-cli/src/main.rs | 1 - crates/openshell-cli/src/run.rs | 52 ++- .../sandbox_create_lifecycle_integration.rs | 6 +- crates/openshell-driver-vm/Cargo.toml | 2 +- crates/openshell-driver-vm/README.md | 4 +- crates/openshell-driver-vm/src/driver.rs | 355 ++++++++++++------ crates/openshell-driver-vm/src/main.rs | 4 - crates/openshell-vm/src/lib.rs | 1 + docs/sandboxes/manage-sandboxes.mdx | 4 + tasks/scripts/gateway-vm.sh | 16 +- 17 files changed, 303 insertions(+), 526 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 572bd5f3f..168972961 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3278,18 +3278,15 @@ dependencies = [ "bollard", "bytes", "futures", - "hmac", "miette", "openshell-core", "rcgen", "serde", "serde_json", - "sha2 0.10.9", "tar", "tempfile", "tokio", "tracing", - "url", ] [[package]] @@ -3422,6 +3419,7 @@ dependencies = [ name = "openshell-driver-vm" version = "0.0.0" dependencies = [ + "bollard", "clap", "flate2", "futures", @@ -3430,7 +3428,6 @@ dependencies = [ "miette", "nix", "oci-client", - "openshell-bootstrap", "openshell-core", "openshell-vfio", "polling", diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 3c721ae50..55f9a8b1f 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -76,9 +76,12 @@ Old runtime cache versions are cleaned up when a new version is extracted. ### Sandbox rootfs preparation Each VM sandbox starts from either a registry image fetched directly over OCI or -a local rootfs tar artifact exported by the CLI for Dockerfile-based `--from` -sources, then the driver **rewrites that filesystem into a supervisor-only -sandbox guest** before caching it: +a local Docker image reference produced by Dockerfile-based `--from` sources. +For local Dockerfile sources, the CLI builds the image on the local Docker +daemon and passes the VM driver an internal `openshell-vm-local-image:` +reference. The driver resolves that tag on the gateway host, exports the image +filesystem, and **rewrites that filesystem into a supervisor-only sandbox +guest** before caching it: - `/srv/openshell-vm-sandbox-init.sh` is installed as the guest entrypoint - the bundled `openshell-sandbox` binary is copied into @@ -108,7 +111,7 @@ sandbox image. The driver: - resolves the image on the gateway host without Docker for registry and community image refs - for local Dockerfile sources, the CLI builds through the host Docker socket - and hands the VM driver a local rootfs tar artifact instead of a Docker tag + and hands the VM driver an internal local-image ref instead of a registry ref - unpacks the image filesystem, injects the VM sandbox init/supervisor files, and validates required guest tools such as `bash`, `mount`, `ip`, and `sed` - caches the prepared guest rootfs under @@ -116,16 +119,20 @@ sandbox image. The driver: - extracts a private runtime copy under `/sandboxes//rootfs` -The cache key uses an immutable image identity: repo digest when available, -otherwise a SHA-256 fingerprint of the local rootfs tar artifact. +The cache key uses an immutable image identity: repo digest for registry images +and the local Docker image ID for local-image refs. Different VM sandboxes can use different base images concurrently because the shared cache is per image, not global for the driver. Cached prepared rootfs entries remain on disk until the operator removes them from the VM driver state directory. Docker is therefore no longer required for VM sandboxes created from registry or -community image refs. It is only required on the CLI host when the source is a -local Dockerfile or build context. +community image refs. It is only required on the local CLI/gateway host when the +source is a local Dockerfile or build context. + +Local Dockerfile sources are treated as trusted local-development inputs for VM +gateways. Remote VM gateways still reject local Dockerfile sources until a +gateway-side artifact validation and transfer boundary is designed. There is no embedded guest rootfs fallback anymore. VM sandboxes therefore require either `template.image` or a configured default sandbox image. This is diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md index 69e5c10d4..cb1546db2 100644 --- a/architecture/sandbox-custom-containers.md +++ b/architecture/sandbox-custom-containers.md @@ -9,7 +9,7 @@ The `--from` flag accepts four kinds of input: | Input | Example | Behavior | |-------|---------|----------| | **Community sandbox name** | `--from openclaw` | Resolves to `ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest` | -| **Dockerfile path** | `--from ./Dockerfile` | Builds the image, pushes it into the cluster, then creates the sandbox | +| **Dockerfile path** | `--from ./Dockerfile` | Builds the image locally, makes it available to the local gateway, then creates the sandbox | | **Directory with Dockerfile** | `--from ./my-sandbox/` | Uses the directory as the build context | | **Full image reference** | `--from myregistry.com/img:tag` | Uses the image directly | @@ -34,8 +34,13 @@ When `--from` points to a Dockerfile or directory, the CLI: 1. Builds the image locally via the Docker daemon (respecting `.dockerignore`). 2. For a local Kubernetes gateway, pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. -3. For a local VM gateway, exports the built image as a rootfs tar artifact and passes the VM driver a gateway-signed internal artifact reference. The driver rejects unsigned `openshell-rootfs-tar:` references so API clients cannot point it at arbitrary gateway host files. -4. Creates the sandbox with the resulting image tag or VM artifact reference. +3. For a local VM gateway, passes the VM driver an internal `openshell-vm-local-image:` reference. The driver resolves that tag against the local Docker daemon, exports the image filesystem, and prepares the VM rootfs in its own cache. +4. Creates the sandbox with the resulting image tag or VM-local image reference. + +Local Dockerfile sources for VM gateways are trusted local-development inputs. +Remote gateways continue to reject local Dockerfile sources because the gateway +API does not yet validate or transfer local build artifacts across that +boundary. ## How It Works diff --git a/crates/openshell-bootstrap/Cargo.toml b/crates/openshell-bootstrap/Cargo.toml index 821817fd8..942ffc48b 100644 --- a/crates/openshell-bootstrap/Cargo.toml +++ b/crates/openshell-bootstrap/Cargo.toml @@ -16,13 +16,10 @@ base64 = "0.22" bollard = { version = "0.20", features = ["ssh"] } bytes = { workspace = true } futures = { workspace = true } -hmac = "0.12" miette = { workspace = true } rcgen = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -sha2 = "0.10" -url = { workspace = true } tar = "0.4" tempfile = "3" tokio = { workspace = true } diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index 3191b88b1..a313d4394 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -1,47 +1,25 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Build and export container images for gateway runtimes. +//! Build container images for gateway runtimes. //! //! This module wraps bollard's `build_image()` API to build a container image //! from a Dockerfile and build context. Kubernetes deployments reuse the //! existing push pipeline to import the image into the gateway's containerd -//! runtime, while the VM backend can export the built image as a rootfs tar. +//! runtime. VM deployments keep the built image in the local Docker daemon and +//! pass an internal local-image reference to the VM driver. use std::collections::HashMap; -use std::fs::File; -use std::io::Read; -use std::path::{Path, PathBuf}; +use std::path::Path; -use base64::Engine as _; -use base64::engine::general_purpose::URL_SAFE_NO_PAD; use bollard::Docker; -use bollard::models::ContainerCreateBody; -use bollard::query_parameters::{ - BuildImageOptionsBuilder, CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder, -}; +use bollard::query_parameters::BuildImageOptionsBuilder; use futures::StreamExt; -use hmac::{Hmac, Mac}; use miette::{IntoDiagnostic, Result, WrapErr}; -use sha2::{Digest, Sha256}; -use tokio::io::AsyncWriteExt; -use url::{Position, Url}; use crate::constants::container_name; use crate::push::push_local_images; -/// Pseudo-image URI scheme used to hand a local rootfs tar artifact to the VM driver. -pub const ROOTFS_TAR_IMAGE_REF_SCHEME: &str = "openshell-rootfs-tar"; -const ROOTFS_TAR_IMAGE_REF_VERSION: &str = "v1"; -type HmacSha256 = Hmac; - -/// Authenticated VM rootfs tar artifact decoded from an internal image reference. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RootfsTarImageRef { - pub path: PathBuf, - pub digest: String, -} - /// Build a container image from a Dockerfile using the local Docker daemon. /// /// This is used by `openshell sandbox create --from ` for both the @@ -65,191 +43,6 @@ pub async fn build_local_image( Ok(()) } -/// Encode a local rootfs tar path as an internal image reference understood by the VM driver. -pub fn encode_rootfs_tar_image_ref(path: &Path) -> Result { - let canonical = path - .canonicalize() - .into_diagnostic() - .wrap_err_with(|| format!("failed to resolve rootfs tar path {}", path.display()))?; - let file_url = Url::from_file_path(&canonical).map_err(|()| { - miette::miette!("failed to encode rootfs tar path {}", canonical.display()) - })?; - Ok(format!( - "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{}", - &file_url[Position::BeforePath..] - )) -} - -/// Encode a local rootfs tar path as an authenticated VM-driver artifact reference. -pub fn encode_authenticated_rootfs_tar_image_ref(path: &Path, secret: &str) -> Result { - let canonical = path - .canonicalize() - .into_diagnostic() - .wrap_err_with(|| format!("failed to resolve rootfs tar path {}", path.display()))?; - let digest = compute_file_sha256_hex(&canonical)?; - let file_url = Url::from_file_path(&canonical).map_err(|()| { - miette::miette!("failed to encode rootfs tar path {}", canonical.display()) - })?; - let file_url = file_url.to_string(); - let signature = sign_rootfs_tar_image_ref(secret, &file_url, &digest)?; - Ok(format!( - "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{ROOTFS_TAR_IMAGE_REF_VERSION}:{}:{digest}:{}", - URL_SAFE_NO_PAD.encode(file_url.as_bytes()), - URL_SAFE_NO_PAD.encode(signature), - )) -} - -/// Decode a VM-driver rootfs tar image reference back to a local filesystem path. -pub fn decode_rootfs_tar_image_ref(image_ref: &str) -> Option { - let remainder = image_ref.strip_prefix(&format!("{ROOTFS_TAR_IMAGE_REF_SCHEME}:"))?; - let file_url = format!("file:{remainder}"); - Url::parse(&file_url).ok()?.to_file_path().ok() -} - -/// Decode and verify an authenticated VM-driver rootfs tar image reference. -pub fn decode_authenticated_rootfs_tar_image_ref( - image_ref: &str, - secret: &str, -) -> Result> { - let Some(remainder) = image_ref.strip_prefix(&format!( - "{ROOTFS_TAR_IMAGE_REF_SCHEME}:{ROOTFS_TAR_IMAGE_REF_VERSION}:" - )) else { - return Ok(None); - }; - - let mut parts = remainder.split(':'); - let encoded_url = parts - .next() - .filter(|value| !value.is_empty()) - .ok_or_else(|| miette::miette!("missing rootfs artifact path"))?; - let digest = parts - .next() - .filter(|value| !value.is_empty()) - .ok_or_else(|| miette::miette!("missing rootfs artifact digest"))?; - let encoded_signature = parts - .next() - .filter(|value| !value.is_empty()) - .ok_or_else(|| miette::miette!("missing rootfs artifact signature"))?; - if parts.next().is_some() { - return Err(miette::miette!("malformed rootfs artifact reference")); - } - validate_sha256_hex(digest)?; - - let file_url_bytes = URL_SAFE_NO_PAD - .decode(encoded_url) - .map_err(|err| miette::miette!("invalid rootfs artifact path encoding: {err}"))?; - let file_url = String::from_utf8(file_url_bytes) - .map_err(|err| miette::miette!("invalid rootfs artifact path encoding: {err}"))?; - - let signature = URL_SAFE_NO_PAD - .decode(encoded_signature) - .map_err(|err| miette::miette!("invalid rootfs artifact signature encoding: {err}"))?; - verify_rootfs_tar_image_ref_signature(secret, &file_url, digest, &signature)?; - - let url = Url::parse(&file_url) - .into_diagnostic() - .wrap_err("invalid rootfs artifact URL")?; - if url.scheme() != "file" { - return Err(miette::miette!("rootfs artifact URL must use file scheme")); - } - let path = url - .to_file_path() - .map_err(|()| miette::miette!("rootfs artifact URL is not a local file path"))?; - - Ok(Some(RootfsTarImageRef { - path, - digest: format!("sha256:{digest}"), - })) -} - -/// Export a locally-built Docker image as a persistent rootfs tar artifact for the VM driver. -pub async fn export_local_image_rootfs( - image_ref: &str, - on_log: &mut impl FnMut(String), -) -> Result { - let temp = tempfile::Builder::new() - .prefix("openshell-vm-rootfs-") - .suffix(".tar") - .tempfile() - .into_diagnostic() - .wrap_err("failed to allocate temporary VM rootfs artifact")?; - let temp_path = temp.path().to_path_buf(); - let (_file, output_path) = temp.keep().into_diagnostic().wrap_err_with(|| { - format!( - "failed to persist temporary VM rootfs artifact {}", - temp_path.display() - ) - })?; - - on_log(format!( - "Exporting built image {image_ref} as VM rootfs artifact {}", - output_path.display() - )); - export_local_image_rootfs_to_path(image_ref, &output_path).await?; - on_log(format!( - "Exported VM rootfs artifact {}", - output_path.display() - )); - Ok(output_path) -} - -fn compute_file_sha256_hex(path: &Path) -> Result { - let mut file = File::open(path) - .into_diagnostic() - .wrap_err_with(|| format!("failed to open {}", path.display()))?; - let mut hasher = Sha256::new(); - let mut buf = [0u8; 64 * 1024]; - loop { - let read = file - .read(&mut buf) - .into_diagnostic() - .wrap_err_with(|| format!("failed to read {}", path.display()))?; - if read == 0 { - break; - } - hasher.update(&buf[..read]); - } - Ok(format!("{:x}", hasher.finalize())) -} - -fn validate_sha256_hex(value: &str) -> Result<()> { - if value.len() == 64 && value.bytes().all(|byte| byte.is_ascii_hexdigit()) { - Ok(()) - } else { - Err(miette::miette!("rootfs artifact digest must be sha256 hex")) - } -} - -fn rootfs_tar_image_ref_message(file_url: &str, digest: &str) -> String { - format!("{ROOTFS_TAR_IMAGE_REF_VERSION}\n{file_url}\nsha256:{digest}") -} - -fn rootfs_tar_image_ref_hmac(secret: &str) -> Result { - if secret.trim().is_empty() { - return Err(miette::miette!("rootfs artifact secret is empty")); - } - HmacSha256::new_from_slice(secret.as_bytes()) - .map_err(|err| miette::miette!("invalid rootfs artifact secret: {err}")) -} - -fn sign_rootfs_tar_image_ref(secret: &str, file_url: &str, digest: &str) -> Result> { - let mut mac = rootfs_tar_image_ref_hmac(secret)?; - mac.update(rootfs_tar_image_ref_message(file_url, digest).as_bytes()); - Ok(mac.finalize().into_bytes().to_vec()) -} - -fn verify_rootfs_tar_image_ref_signature( - secret: &str, - file_url: &str, - digest: &str, - signature: &[u8], -) -> Result<()> { - let mut mac = rootfs_tar_image_ref_hmac(secret)?; - mac.update(rootfs_tar_image_ref_message(file_url, digest).as_bytes()); - mac.verify_slice(signature) - .map_err(|_| miette::miette!("rootfs artifact signature verification failed")) -} - /// Push a locally-built image into the gateway's containerd runtime. #[allow(clippy::implicit_hasher)] pub async fn push_image_into_gateway( @@ -360,79 +153,6 @@ async fn build_image( Ok(()) } -async fn export_local_image_rootfs_to_path(image_ref: &str, tar_path: &Path) -> Result<()> { - let docker = Docker::connect_with_local_defaults() - .into_diagnostic() - .wrap_err("failed to connect to local Docker daemon")?; - let container_name = format!( - "openshell-rootfs-export-{}", - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() - ); - let create_options = CreateContainerOptionsBuilder::default() - .name(container_name.as_str()) - .build(); - let container = docker - .create_container( - Some(create_options), - ContainerCreateBody { - image: Some(image_ref.to_string()), - ..Default::default() - }, - ) - .await - .into_diagnostic() - .wrap_err_with(|| { - format!("failed to create temporary export container for image {image_ref}") - })?; - let container_id = container.id; - - let export_result = async { - if let Some(parent) = tar_path.parent() { - tokio::fs::create_dir_all(parent) - .await - .into_diagnostic() - .wrap_err_with(|| format!("failed to create {}", parent.display()))?; - } - let mut file = tokio::fs::File::create(tar_path) - .await - .into_diagnostic() - .wrap_err_with(|| format!("failed to create {}", tar_path.display()))?; - let mut stream = docker.export_container(&container_id); - while let Some(chunk) = stream.next().await { - let chunk = chunk - .into_diagnostic() - .wrap_err_with(|| format!("failed to export image {image_ref}"))?; - file.write_all(&chunk) - .await - .into_diagnostic() - .wrap_err_with(|| format!("failed to write {}", tar_path.display()))?; - } - file.flush() - .await - .into_diagnostic() - .wrap_err_with(|| format!("failed to flush {}", tar_path.display())) - } - .await; - - let cleanup_result = docker - .remove_container( - &container_id, - Some(RemoveContainerOptionsBuilder::default().force(true).build()), - ) - .await; - - match (export_result, cleanup_result) { - (Ok(()), Ok(())) => Ok(()), - (Err(err), _) => Err(err), - (Ok(()), Err(err)) => Err(err).into_diagnostic().wrap_err_with(|| { - format!("failed to remove temporary export container for {image_ref}") - }), - } -} - /// Create a tar archive of a directory for use as a Docker build context. /// /// Walks `context_dir` recursively, respects a `.dockerignore` file if present, @@ -774,47 +494,4 @@ mod tests { assert!(is_ignored("node_modules", true, &patterns)); assert!(is_ignored("node_modules/foo.js", false, &patterns)); } - - #[test] - fn encode_and_decode_rootfs_tar_image_ref_round_trip() { - let dir = tempfile::tempdir().unwrap(); - let tar_path = dir.path().join("rootfs tar.tar"); - fs::write(&tar_path, "rootfs").unwrap(); - - let encoded = encode_rootfs_tar_image_ref(&tar_path).unwrap(); - let decoded = decode_rootfs_tar_image_ref(&encoded).unwrap(); - - assert_eq!(decoded, tar_path.canonicalize().unwrap()); - } - - #[test] - fn authenticated_rootfs_tar_image_ref_round_trip() { - let dir = tempfile::tempdir().unwrap(); - let tar_path = dir.path().join("rootfs tar.tar"); - fs::write(&tar_path, "rootfs").unwrap(); - - let encoded = encode_authenticated_rootfs_tar_image_ref(&tar_path, "secret").unwrap(); - let decoded = decode_authenticated_rootfs_tar_image_ref(&encoded, "secret") - .unwrap() - .unwrap(); - - assert_eq!(decoded.path, tar_path.canonicalize().unwrap()); - assert_eq!( - decoded.digest, - "sha256:3c47ef972d531d524daa15fa33dd885dd23de6221bbd10a29eb42ecfcf2ef422" - ); - } - - #[test] - fn authenticated_rootfs_tar_image_ref_rejects_wrong_secret() { - let dir = tempfile::tempdir().unwrap(); - let tar_path = dir.path().join("rootfs.tar"); - fs::write(&tar_path, "rootfs").unwrap(); - - let encoded = encode_authenticated_rootfs_tar_image_ref(&tar_path, "secret").unwrap(); - let err = decode_authenticated_rootfs_tar_image_ref(&encoded, "wrong-secret") - .expect_err("wrong secret should fail"); - - assert!(err.to_string().contains("signature verification failed")); - } } diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index 5892dd15f..ee6509e93 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -50,10 +50,6 @@ pub struct GatewayMetadata { /// Local VM driver state directory for standalone VM gateways. #[serde(default, skip_serializing_if = "Option::is_none")] pub vm_driver_state_dir: Option, - - /// Local secret used to authenticate VM rootfs artifact references. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub vm_rootfs_artifact_secret: Option, } impl GatewayMetadata { @@ -145,7 +141,6 @@ pub fn create_gateway_metadata_with_host( edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, } } @@ -474,7 +469,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; let json = serde_json::to_string(&meta).unwrap(); let parsed: GatewayMetadata = serde_json::from_str(&json).unwrap(); @@ -571,7 +565,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; assert_eq!(meta.gateway_host(), None); } @@ -589,7 +582,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; assert_eq!(meta.gateway_host(), Some("10.0.0.5")); } diff --git a/crates/openshell-cli/src/completers.rs b/crates/openshell-cli/src/completers.rs index 31a98158c..3b87e4b88 100644 --- a/crates/openshell-cli/src/completers.rs +++ b/crates/openshell-cli/src/completers.rs @@ -184,7 +184,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }, ) .unwrap(); diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index da7b3d13a..7a8c93a36 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2846,7 +2846,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, } } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index edb3da982..ee8ea0c36 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -898,7 +898,6 @@ fn plaintext_gateway_metadata( edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, } } @@ -1101,7 +1100,6 @@ pub async fn gateway_add( edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; store_gateway_metadata(name, &metadata)?; @@ -1132,7 +1130,6 @@ pub async fn gateway_add( edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; store_gateway_metadata(name, &metadata)?; @@ -2663,6 +2660,12 @@ fn image_requests_gpu(image: &str) -> bool { image_name.contains("gpu") } +const VM_LOCAL_IMAGE_REF_SCHEME: &str = "openshell-vm-local-image"; + +fn vm_local_image_ref(image_ref: &str) -> String { + format!("{VM_LOCAL_IMAGE_REF_SCHEME}:{image_ref}") +} + fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) -> bool { !metadata.is_some_and(|metadata| metadata.is_remote) } @@ -2671,8 +2674,9 @@ fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) /// /// For local Kubernetes gateways running in Docker, this imports the built image /// into the gateway runtime and returns the Docker tag. For local VM gateways, -/// this exports the built image as a rootfs tar artifact and returns an internal -/// pseudo-image URI understood by the VM driver. +/// this returns an internal local-image URI. The VM driver resolves that URI +/// against the local Docker daemon and prepares the VM rootfs on the gateway +/// host. async fn build_from_dockerfile( dockerfile: &Path, context: &Path, @@ -2732,34 +2736,18 @@ async fn build_from_dockerfile( return Ok(tag); } - let rootfs_tar = openshell_bootstrap::build::export_local_image_rootfs(&tag, &mut on_log) - .await - .wrap_err("failed to export built image as a VM rootfs artifact")?; - let artifact_secret = metadata - .as_ref() - .and_then(|metadata| metadata.vm_rootfs_artifact_secret.as_deref()) - .filter(|secret| !secret.trim().is_empty()) - .ok_or_else(|| { - miette!( - "local Dockerfile sources for VM gateways require authenticated rootfs artifact metadata; restart gateway '{}' with a current `mise run gateway:vm`", - gateway_name - ) - })?; - let artifact_ref = openshell_bootstrap::build::encode_authenticated_rootfs_tar_image_ref( - &rootfs_tar, - artifact_secret, - )?; + let local_image_ref = vm_local_image_ref(&tag); eprintln!(); eprintln!( - "{} VM rootfs artifact {} is ready for gateway '{}'.", + "{} Image {} will be resolved by the local VM driver for gateway '{}'.", "✓".green().bold(), - rootfs_tar.display().to_string().cyan(), + tag.cyan(), gateway_name, ); eprintln!(); - Ok(artifact_ref) + Ok(local_image_ref) } /// Load sandbox policy YAML. @@ -5583,6 +5571,7 @@ mod tests { parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message, resolve_gateway_control_target_from, sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name, validate_ssh_host, + vm_local_image_ref, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -5660,7 +5649,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, } } @@ -5847,7 +5835,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; assert!(!dockerfile_sources_supported_for_gateway(Some(&metadata))); @@ -5866,13 +5853,20 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; assert!(dockerfile_sources_supported_for_gateway(Some(&metadata))); assert!(dockerfile_sources_supported_for_gateway(None)); } + #[test] + fn vm_local_image_ref_wraps_docker_image_ref() { + assert_eq!( + vm_local_image_ref("openshell/sandbox-from:123"), + "openshell-vm-local-image:openshell/sandbox-from:123" + ); + } + #[test] fn ready_false_condition_message_prefers_reason_and_message() { let status = SandboxStatus { @@ -6089,7 +6083,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }, ]; @@ -6126,7 +6119,6 @@ mod tests { edge_team_domain: None, edge_auth_url: None, vm_driver_state_dir: None, - vm_rootfs_artifact_secret: None, }; assert_eq!(gateway_auth_label(&gateway), "mtls"); diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index e69d06f4f..687ee87b2 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -735,6 +735,10 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { let _env = test_env(&fake_ssh_dir, &xdg_dir); let tls = test_tls(&server); install_fake_ssh(&fake_ssh_dir); + let forward_port = { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + listener.local_addr().unwrap().port() + }; run::sandbox_create( &server.endpoint, @@ -750,7 +754,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { None, &[], None, - Some(openshell_core::forward::ForwardSpec::new(8080)), + Some(openshell_core::forward::ForwardSpec::new(forward_port)), &["echo".to_string(), "OK".to_string()], Some(false), Some(false), diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml index 97c15d261..c13d904a6 100644 --- a/crates/openshell-driver-vm/Cargo.toml +++ b/crates/openshell-driver-vm/Cargo.toml @@ -21,8 +21,8 @@ path = "src/main.rs" [dependencies] openshell-core = { path = "../openshell-core" } openshell-vfio = { path = "../openshell-vfio" } -openshell-bootstrap = { path = "../openshell-bootstrap" } +bollard = { version = "0.20", features = ["ssh"] } tokio = { workspace = true } tonic = { workspace = true, features = ["transport"] } prost-types = { workspace = true } diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index d42dea442..49807332e 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -164,7 +164,9 @@ The VM guest's serial console is appended to `//console.l - Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest) - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. - [mise](https://mise.jdx.dev/) task runner -- Docker-compatible socket on the CLI host when using `openshell sandbox create --from ./Dockerfile` or `--from ./dir` +- Docker-compatible socket on the local CLI/gateway host when using + `openshell sandbox create --from ./Dockerfile` or `--from ./dir`; the CLI + builds the image and the VM driver exports it via the local Docker daemon - `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts) ## Relationship to `openshell-vm` diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 0270c5a5d..9dbd59808 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -8,8 +8,11 @@ use crate::rootfs::{ create_rootfs_archive_from_dir, extract_rootfs_archive_to, prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path, }; +use bollard::Docker; +use bollard::models::ContainerCreateBody; +use bollard::query_parameters::{CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder}; use flate2::read::GzDecoder; -use futures::Stream; +use futures::{Stream, StreamExt}; use nix::errno::Errno; use nix::sys::signal::{Signal, kill}; use nix::unistd::Pid; @@ -17,9 +20,6 @@ use oci_client::client::{Client as OciClient, ClientConfig}; use oci_client::manifest::{ImageIndexEntry, OciDescriptor}; use oci_client::secrets::RegistryAuth; use oci_client::{Reference, RegistryOperation}; -use openshell_bootstrap::build::{ - ROOTFS_TAR_IMAGE_REF_SCHEME, decode_authenticated_rootfs_tar_image_ref, -}; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, @@ -89,8 +89,10 @@ const GUEST_TLS_CERT_PATH: &str = "/opt/openshell/tls/tls.crt"; const GUEST_TLS_KEY_PATH: &str = "/opt/openshell/tls/tls.key"; const IMAGE_CACHE_ROOT_DIR: &str = "images"; const IMAGE_CACHE_ROOTFS_ARCHIVE: &str = "rootfs.tar"; +const IMAGE_EXPORT_ROOTFS_ARCHIVE: &str = "source-rootfs.tar"; const IMAGE_IDENTITY_FILE: &str = "image-identity"; const IMAGE_REFERENCE_FILE: &str = "image-reference"; +const VM_LOCAL_IMAGE_REF_PREFIX: &str = "openshell-vm-local-image:"; static IMAGE_CACHE_BUILD_COUNTER: AtomicU64 = AtomicU64::new(0); #[derive(Debug, Clone)] @@ -118,7 +120,6 @@ pub struct VmDriverConfig { pub gpu_enabled: bool, pub gpu_mem_mib: u32, pub gpu_vcpus: u8, - pub rootfs_artifact_secret: Option, } impl Default for VmDriverConfig { @@ -140,7 +141,6 @@ impl Default for VmDriverConfig { gpu_enabled: false, gpu_mem_mib: 8192, gpu_vcpus: 4, - rootfs_artifact_secret: None, } } } @@ -760,32 +760,9 @@ impl VmDriver { sandbox_id: &str, image_ref: &str, ) -> Result { - if is_rootfs_tar_image_ref(image_ref) { - let secret = self - .config - .rootfs_artifact_secret - .as_deref() - .filter(|secret| !secret.trim().is_empty()) - .ok_or_else(|| { - Status::failed_precondition( - "vm rootfs tar image references require a gateway-issued artifact secret", - ) - })?; - let artifact = decode_authenticated_rootfs_tar_image_ref(image_ref, secret) - .map_err(|err| { - Status::failed_precondition(format!( - "invalid vm rootfs artifact reference: {err}" - )) - })? - .ok_or_else(|| { - Status::failed_precondition("invalid vm rootfs artifact reference") - })?; + if let Some(local_image_ref) = parse_vm_local_image_ref(image_ref)? { return self - .ensure_cached_rootfs_tar_image_rootfs_archive( - image_ref, - &artifact.path, - &artifact.digest, - ) + .ensure_cached_local_image_rootfs_archive(sandbox_id, image_ref, local_image_ref) .await; } @@ -876,58 +853,60 @@ impl VmDriver { Ok(image_identity) } - async fn ensure_cached_rootfs_tar_image_rootfs_archive( + async fn ensure_cached_local_image_rootfs_archive( &self, + sandbox_id: &str, image_ref: &str, - rootfs_tar_path: &Path, - expected_digest: &str, + local_image_ref: &str, ) -> Result { - let rootfs_tar = rootfs_tar_path.to_path_buf(); - let image_identity = tokio::task::spawn_blocking(move || compute_file_sha256(&rootfs_tar)) - .await - .map_err(|err| { - Status::internal(format!("rootfs tar digest computation panicked: {err}")) - })? - .map_err(|err| { - Status::failed_precondition(format!( - "failed to fingerprint vm sandbox rootfs artifact '{}': {err}", - rootfs_tar_path.display() - )) - })?; - if image_identity != expected_digest { - return Err(Status::failed_precondition( - "vm rootfs artifact digest does not match the authenticated reference", - )); - } + let docker = Docker::connect_with_local_defaults().map_err(|err| { + Status::failed_precondition(format!( + "failed to connect to local Docker daemon for vm local image '{image_ref}': {err}" + )) + })?; + let image_identity = local_docker_image_identity(&docker, local_image_ref).await?; let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulling", + format!("Pulling image \"{local_image_ref}\""), + ), + ); + if tokio::fs::metadata(&archive_path).await.is_ok() { + self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + .await; return Ok(image_identity); } let _cache_guard = self.image_cache_lock.lock().await; if tokio::fs::metadata(&archive_path).await.is_ok() { + self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + .await; return Ok(image_identity); } - self.build_cached_rootfs_tar_image_rootfs_archive( - image_ref, - rootfs_tar_path, - &image_identity, - ) - .await?; + self.build_cached_local_image_rootfs_archive(&docker, local_image_ref, &image_identity) + .await?; + self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + .await; Ok(image_identity) } - async fn build_cached_rootfs_tar_image_rootfs_archive( + async fn build_cached_local_image_rootfs_archive( &self, + docker: &Docker, image_ref: &str, - rootfs_tar_path: &Path, image_identity: &str, ) -> Result<(), Status> { let cache_dir = image_cache_dir(&self.config.state_dir, image_identity); let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); let staging_dir = image_cache_staging_dir(&self.config.state_dir, image_identity); + let exported_rootfs = staging_dir.join(IMAGE_EXPORT_ROOTFS_ARCHIVE); let prepared_rootfs = staging_dir.join("rootfs"); let prepared_archive = staging_dir.join(IMAGE_CACHE_ROOTFS_ARCHIVE); @@ -953,24 +932,29 @@ impl VmDriver { Status::internal(format!("create image cache staging dir failed: {err}")) })?; + if let Err(err) = + export_local_image_rootfs_to_path(docker, image_ref, &exported_rootfs).await + { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(err); + } + let image_ref_owned = image_ref.to_string(); let image_identity_owned = image_identity.to_string(); - let rootfs_tar_path_owned = rootfs_tar_path.to_path_buf(); + let exported_rootfs_for_build = exported_rootfs.clone(); let prepared_rootfs_for_build = prepared_rootfs.clone(); let prepared_archive_for_build = prepared_archive.clone(); let build_result = tokio::task::spawn_blocking(move || { - extract_rootfs_archive_to(&rootfs_tar_path_owned, &prepared_rootfs_for_build)?; - prepare_sandbox_rootfs_from_image_root( - &prepared_rootfs_for_build, + prepare_exported_rootfs_archive( + &image_ref_owned, &image_identity_owned, + &exported_rootfs_for_build, + &prepared_rootfs_for_build, + &prepared_archive_for_build, ) - .map_err(|err| { - format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") - })?; - create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) }) .await - .map_err(|err| Status::internal(format!("rootfs artifact preparation panicked: {err}")))?; + .map_err(|err| Status::internal(format!("local image preparation panicked: {err}")))?; if let Err(err) = build_result { let _ = tokio::fs::remove_dir_all(&staging_dir).await; @@ -1436,10 +1420,122 @@ fn parse_registry_reference(image_ref: &str) -> Result { }) } -fn is_rootfs_tar_image_ref(image_ref: &str) -> bool { - image_ref - .strip_prefix(ROOTFS_TAR_IMAGE_REF_SCHEME) - .is_some_and(|rest| rest.starts_with(':')) +#[allow(clippy::result_large_err)] +fn parse_vm_local_image_ref(image_ref: &str) -> Result, Status> { + let Some(local_image_ref) = image_ref.strip_prefix(VM_LOCAL_IMAGE_REF_PREFIX) else { + return Ok(None); + }; + if local_image_ref.trim().is_empty() { + return Err(Status::failed_precondition( + "invalid vm local image reference: missing Docker image reference", + )); + } + Ok(Some(local_image_ref)) +} + +async fn local_docker_image_identity(docker: &Docker, image_ref: &str) -> Result { + let inspect = docker.inspect_image(image_ref).await.map_err(|err| { + Status::failed_precondition(format!( + "failed to inspect local Docker image '{image_ref}': {err}" + )) + })?; + inspect + .id + .filter(|id| !id.trim().is_empty()) + .ok_or_else(|| { + Status::failed_precondition(format!( + "local Docker image '{image_ref}' inspect response has no image ID" + )) + }) +} + +async fn export_local_image_rootfs_to_path( + docker: &Docker, + image_ref: &str, + tar_path: &Path, +) -> Result<(), Status> { + let container_name = format!( + "openshell-vm-rootfs-export-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() + ); + let create_options = CreateContainerOptionsBuilder::default() + .name(container_name.as_str()) + .build(); + let container = docker + .create_container( + Some(create_options), + ContainerCreateBody { + image: Some(image_ref.to_string()), + ..Default::default() + }, + ) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to create temporary export container for local Docker image '{image_ref}': {err}" + )) + })?; + let container_id = container.id; + + let export_result = async { + if let Some(parent) = tar_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|err| { + Status::internal(format!( + "create export dir {} failed: {err}", + parent.display() + )) + })?; + } + let mut file = tokio::fs::File::create(tar_path).await.map_err(|err| { + Status::internal(format!("create {} failed: {err}", tar_path.display())) + })?; + let mut stream = docker.export_container(&container_id); + while let Some(chunk) = stream.next().await { + let chunk = chunk.map_err(|err| { + Status::failed_precondition(format!( + "failed to export local Docker image '{image_ref}': {err}" + )) + })?; + file.write_all(&chunk).await.map_err(|err| { + Status::internal(format!("write {} failed: {err}", tar_path.display())) + })?; + } + file.flush() + .await + .map_err(|err| Status::internal(format!("flush {} failed: {err}", tar_path.display()))) + } + .await; + + let cleanup_result = docker + .remove_container( + &container_id, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await; + + match (export_result, cleanup_result) { + (Ok(()), Ok(())) => Ok(()), + (Err(err), _) => Err(err), + (Ok(()), Err(err)) => Err(Status::internal(format!( + "failed to remove temporary export container for local Docker image '{image_ref}': {err}" + ))), + } +} + +fn prepare_exported_rootfs_archive( + image_ref: &str, + image_identity: &str, + exported_rootfs: &Path, + prepared_rootfs: &Path, + prepared_archive: &Path, +) -> Result<(), String> { + extract_rootfs_archive_to(exported_rootfs, prepared_rootfs)?; + prepare_sandbox_rootfs_from_image_root(prepared_rootfs, image_identity) + .map_err(|err| format!("vm sandbox image '{image_ref}' is not base-compatible: {err}"))?; + create_rootfs_archive_from_dir(prepared_rootfs, prepared_archive) } fn registry_client() -> OciClient { @@ -1700,10 +1796,6 @@ fn verify_descriptor_digest(path: &Path, expected_digest: &str) -> Result<(), St } } -fn compute_file_sha256(path: &Path) -> Result { - compute_file_sha256_hex(path).map(|digest| format!("sha256:{digest}")) -} - fn compute_file_sha256_hex(path: &Path) -> Result { let mut file = fs::File::open(path).map_err(|err| format!("open {}: {err}", path.display()))?; let mut hasher = Sha256::new(); @@ -2585,29 +2677,28 @@ mod tests { ); } - #[tokio::test] - async fn ensure_cached_image_rootfs_archive_rejects_unsigned_rootfs_tar_refs() { - let base = unique_temp_dir(); - fs::create_dir_all(&base).unwrap(); - let artifact = base.join("rootfs.tar"); - fs::write(&artifact, "not a real rootfs").unwrap(); - let image_ref = openshell_bootstrap::build::encode_rootfs_tar_image_ref(&artifact).unwrap(); - let driver = test_driver(VmDriverConfig { - state_dir: base.join("driver-state"), - ..Default::default() - }); - - let err = driver - .ensure_cached_image_rootfs_archive("sandbox-123", &image_ref) - .await - .expect_err("unsigned rootfs tar refs must be rejected"); - - assert_eq!(err.code(), Code::FailedPrecondition); - assert!( - err.message().contains("gateway-issued artifact secret"), - "unexpected error: {}", - err.message() + #[test] + fn parse_vm_local_image_ref_classifies_prefixed_refs() { + assert_eq!( + parse_vm_local_image_ref("openshell-vm-local-image:openshell/sandbox-from:123") + .unwrap(), + Some("openshell/sandbox-from:123") ); + assert_eq!(parse_vm_local_image_ref("ubuntu:24.04").unwrap(), None); + } + + #[test] + fn parse_vm_local_image_ref_rejects_empty_refs() { + for image_ref in ["openshell-vm-local-image:", "openshell-vm-local-image: "] { + let err = + parse_vm_local_image_ref(image_ref).expect_err("empty local image refs must fail"); + assert_eq!(err.code(), Code::FailedPrecondition); + assert!( + err.message().contains("missing Docker image reference"), + "unexpected error: {}", + err.message() + ); + } } #[test] @@ -2778,15 +2869,56 @@ mod tests { } #[test] - fn compute_file_sha256_returns_prefixed_digest() { + fn prepare_exported_rootfs_archive_rewrites_docker_exported_rootfs() { let base = unique_temp_dir(); - fs::create_dir_all(&base).unwrap(); - let file = base.join("rootfs.tar"); - fs::write(&file, b"openshell").unwrap(); + let source_rootfs = base.join("source-rootfs"); + let exported_rootfs = base.join("exported-rootfs.tar"); + let prepared_rootfs = base.join("prepared-rootfs"); + let prepared_archive = base.join("prepared-rootfs.tar"); + let extracted = base.join("extracted"); + + for path in [ + "bin/bash", + "bin/mount", + "bin/sed", + "sbin/ip", + "opt/openshell/bin/openshell-sandbox", + "usr/local/bin/k3s", + ] { + let path = source_rootfs.join(path); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, "").unwrap(); + } + fs::create_dir_all(source_rootfs.join("opt/openshell/manifests")).unwrap(); + fs::write(source_rootfs.join("opt/openshell/manifests/old.yaml"), "").unwrap(); + + create_rootfs_archive_from_dir(&source_rootfs, &exported_rootfs).unwrap(); + prepare_exported_rootfs_archive( + "openshell/sandbox-from:123", + "sha256:local-image", + &exported_rootfs, + &prepared_rootfs, + &prepared_archive, + ) + .unwrap(); + extract_rootfs_archive_to(&prepared_archive, &extracted).unwrap(); + assert!(extracted.join("srv/openshell-vm-sandbox-init.sh").is_file()); + assert!( + extracted + .join("opt/openshell/bin/openshell-sandbox") + .is_file() + ); + assert!(!extracted.join("usr/local/bin/k3s").exists()); + assert!(!extracted.join("opt/openshell/manifests").exists()); assert_eq!( - compute_file_sha256(&file).unwrap(), - "sha256:dc5cbc21a452a783ec453e8a8603101dfec5c7d6a19b6c645889bec8b97c2390" + fs::read_to_string(extracted.join("opt/openshell/.rootfs-type")).unwrap(), + "sandbox\n" + ); + assert!( + fs::read_to_string(extracted.join(".openshell-rootfs-variant")) + .unwrap() + .contains("sha256:local-image") ); let _ = fs::remove_dir_all(base); @@ -2899,21 +3031,6 @@ mod tests { )) } - fn test_driver(config: VmDriverConfig) -> VmDriver { - VmDriver { - config, - launcher_bin: PathBuf::from("openshell-driver-vm"), - registry: Arc::new(Mutex::new(HashMap::new())), - image_cache_lock: Arc::new(Mutex::new(())), - events: broadcast::channel(WATCH_BUFFER).0, - gpu_inventory: None, - subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( - Ipv4Addr::new(10, 0, 128, 0), - 17, - ))), - } - } - fn spawn_exited_child() -> Child { Command::new("sh") .arg("-c") diff --git a/crates/openshell-driver-vm/src/main.rs b/crates/openshell-driver-vm/src/main.rs index 3f5d70c14..596e6c88d 100644 --- a/crates/openshell-driver-vm/src/main.rs +++ b/crates/openshell-driver-vm/src/main.rs @@ -105,9 +105,6 @@ struct Args { #[arg(long, env = "OPENSHELL_VM_GPU_VCPUS", default_value_t = 4)] gpu_vcpus: u8, - #[arg(long, env = "OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET", hide = true)] - rootfs_artifact_secret: Option, - #[arg(long, hide = true)] vm_backend: Option, @@ -188,7 +185,6 @@ async fn main() -> Result<()> { gpu_enabled: args.gpu, gpu_mem_mib: args.gpu_mem_mib, gpu_vcpus: args.gpu_vcpus, - rootfs_artifact_secret: args.rootfs_artifact_secret, }) .await .map_err(|err| miette::miette!("{err}"))?; diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index b4d8081c1..ff01c0900 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -1733,6 +1733,7 @@ fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Re auth_mode: None, edge_team_domain: None, edge_auth_url: None, + vm_driver_state_dir: None, }; let exec_socket = vm_exec_socket_path(rootfs); diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index fb24bae9b..da7c2e7f4 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -51,6 +51,10 @@ openshell sandbox create --from my-registry.example.com/my-image:latest The CLI resolves community names against the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) catalog, pulls the bundled Dockerfile and policy, builds the image locally, and creates the sandbox. For the full catalog and how to contribute your own, refer to [Community Sandboxes](/sandboxes/community-sandboxes). +Local directories and Dockerfiles require a local gateway because the CLI builds +through the local Docker daemon. Use a registry image reference for remote +gateways. + ### Label a Sandbox Attach labels when you create a sandbox to track ownership, environment, or workflow grouping: diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh index e67f61347..cdeaa7931 100755 --- a/tasks/scripts/gateway-vm.sh +++ b/tasks/scripts/gateway-vm.sh @@ -85,7 +85,6 @@ register_gateway_metadata() { local endpoint=$2 local port=$3 local vm_driver_state_dir=$4 - local rootfs_artifact_secret=$5 local config_home gateway_dir config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" @@ -100,8 +99,7 @@ register_gateway_metadata() { "is_remote": false, "gateway_port": ${port}, "auth_mode": "plaintext", - "vm_driver_state_dir": "${vm_driver_state_dir}", - "vm_rootfs_artifact_secret": "${rootfs_artifact_secret}" + "vm_driver_state_dir": "${vm_driver_state_dir}" } EOF chmod 600 "${gateway_dir}/metadata.json" 2>/dev/null || true @@ -152,10 +150,6 @@ check_supervisor_cross_toolchain() { fi } -generate_rootfs_artifact_secret() { - od -An -N32 -tx1 /dev/urandom | tr -d ' \n' -} - VM_GPU="$(normalize_bool "${OPENSHELL_VM_GPU:-false}")" while [ "$#" -gt 0 ]; do @@ -219,12 +213,6 @@ VM_DRIVER_STATE_DIR_DEFAULT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}/openshell-v VM_DRIVER_STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${VM_DRIVER_STATE_DIR_DEFAULT}}" DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" -ROOTFS_ARTIFACT_SECRET="${OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET:-$(generate_rootfs_artifact_secret)}" -if [[ ! "${ROOTFS_ARTIFACT_SECRET}" =~ ^[A-Za-z0-9._~=-]+$ ]]; then - echo "ERROR: OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET must contain only URL-safe characters" >&2 - exit 2 -fi -export OPENSHELL_VM_ROOTFS_ARTIFACT_SECRET="${ROOTFS_ARTIFACT_SECRET}" # Build prerequisites: VM runtime artifacts + bundled supervisor. if [ ! -d "${COMPRESSED_DIR}" ] \ @@ -264,7 +252,7 @@ mkdir -p "${STATE_DIR}" mkdir -p "${VM_DRIVER_STATE_DIR}" GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" -register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" "${VM_DRIVER_STATE_DIR}" "${ROOTFS_ARTIFACT_SECRET}" +register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" "${VM_DRIVER_STATE_DIR}" save_active_gateway "${GATEWAY_NAME}" echo "Starting standalone VM gateway..." From 91648e5bbb39a1781c8ff707a31588503b582e9d Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 21:06:42 -0700 Subject: [PATCH 09/11] wip --- architecture/custom-vm-runtime.md | 10 +- architecture/sandbox-custom-containers.md | 4 +- crates/openshell-cli/src/run.rs | 138 +++++++++++++----- crates/openshell-driver-vm/src/driver.rs | 162 +++++++++++++--------- scripts/bin/openshell | 12 +- 5 files changed, 218 insertions(+), 108 deletions(-) diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 55f9a8b1f..ad677d3ae 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -78,9 +78,9 @@ Old runtime cache versions are cleaned up when a new version is extracted. Each VM sandbox starts from either a registry image fetched directly over OCI or a local Docker image reference produced by Dockerfile-based `--from` sources. For local Dockerfile sources, the CLI builds the image on the local Docker -daemon and passes the VM driver an internal `openshell-vm-local-image:` -reference. The driver resolves that tag on the gateway host, exports the image -filesystem, and **rewrites that filesystem into a supervisor-only sandbox +daemon and passes the ordinary image tag through `template.image`. The VM driver +first checks the local Docker daemon for that tag; when present, it exports the +image filesystem and **rewrites that filesystem into a supervisor-only sandbox guest** before caching it: - `/srv/openshell-vm-sandbox-init.sh` is installed as the guest entrypoint @@ -111,7 +111,7 @@ sandbox image. The driver: - resolves the image on the gateway host without Docker for registry and community image refs - for local Dockerfile sources, the CLI builds through the host Docker socket - and hands the VM driver an internal local-image ref instead of a registry ref + and passes the resulting ordinary Docker tag through `template.image` - unpacks the image filesystem, injects the VM sandbox init/supervisor files, and validates required guest tools such as `bash`, `mount`, `ip`, and `sed` - caches the prepared guest rootfs under @@ -120,7 +120,7 @@ sandbox image. The driver: `/sandboxes//rootfs` The cache key uses an immutable image identity: repo digest for registry images -and the local Docker image ID for local-image refs. +and the local Docker image ID for images resolved from the local daemon. Different VM sandboxes can use different base images concurrently because the shared cache is per image, not global for the driver. Cached prepared rootfs entries remain on disk until the operator removes them from the VM driver state diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md index cb1546db2..13a568e15 100644 --- a/architecture/sandbox-custom-containers.md +++ b/architecture/sandbox-custom-containers.md @@ -34,8 +34,8 @@ When `--from` points to a Dockerfile or directory, the CLI: 1. Builds the image locally via the Docker daemon (respecting `.dockerignore`). 2. For a local Kubernetes gateway, pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. -3. For a local VM gateway, passes the VM driver an internal `openshell-vm-local-image:` reference. The driver resolves that tag against the local Docker daemon, exports the image filesystem, and prepares the VM rootfs in its own cache. -4. Creates the sandbox with the resulting image tag or VM-local image reference. +3. For standalone local Docker and VM gateways, passes the ordinary image tag through. The Docker driver runs that tag directly; the VM driver resolves it from the local Docker daemon, exports the image filesystem, and prepares the VM rootfs in its own cache. +4. Creates the sandbox with the resulting image tag. Local Dockerfile sources for VM gateways are trusted local-development inputs. Remote gateways continue to reject local Dockerfile sources because the gateway diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index ee8ea0c36..5a8ec1840 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2569,6 +2569,7 @@ pub async fn sandbox_create( } /// Resolved source for the `--from` flag on `sandbox create`. +#[derive(Debug)] enum ResolvedSource { /// A ready-to-use container image reference. Image(String), @@ -2585,19 +2586,15 @@ enum ResolvedSource { /// Resolution order: /// 1. Existing file whose name contains "Dockerfile" → build from file. /// 2. Existing directory that contains a `Dockerfile` → build from directory. -/// 3. Value contains `/`, `:`, or `.` → treat as a full image reference. -/// 4. Otherwise → community sandbox name, expanded via the registry prefix. +/// 3. Missing explicit local paths → local error, not image pull. +/// 4. Value contains `/`, `:`, or `.` → treat as a full image reference. +/// 5. Otherwise → community sandbox name, expanded via the registry prefix. fn resolve_from(value: &str) -> Result { let path = Path::new(value); // 1. Existing file that looks like a Dockerfile. if path.is_file() { - let name = path - .file_name() - .map(|n| n.to_string_lossy()) - .unwrap_or_default(); - let lower = name.to_lowercase(); - if lower.contains("dockerfile") || lower.ends_with(".dockerfile") { + if filename_looks_like_dockerfile(path) { let dockerfile = path .canonicalize() .into_diagnostic() @@ -2611,6 +2608,13 @@ fn resolve_from(value: &str) -> Result { context, }); } + + if value_looks_like_local_source(value) { + return Err(miette::miette!( + "local --from file is not a Dockerfile: {}", + path.display() + )); + } } // 2. Existing directory containing a Dockerfile. @@ -2633,13 +2637,50 @@ fn resolve_from(value: &str) -> Result { )); } - // 3. Full image reference or community sandbox name — delegate to shared + if path.exists() { + return Err(miette::miette!( + "local --from path is not a regular file or directory: {}", + path.display() + )); + } + + // 3. Missing explicit local paths should fail locally. Otherwise values + // like `./Dockerfile` reach the gateway as image references and fail as + // Docker pull errors. + if value_looks_like_local_source(value) { + return Err(miette::miette!( + "local --from path does not exist: {}\n\ + Use an existing Dockerfile, a directory containing Dockerfile, or a container image reference.", + path.display() + )); + } + + // 4. Full image reference or community sandbox name — delegate to shared // resolution in openshell-core. Ok(ResolvedSource::Image( openshell_core::image::resolve_community_image(value), )) } +fn filename_looks_like_dockerfile(path: &Path) -> bool { + let name = path + .file_name() + .map(|n| n.to_string_lossy()) + .unwrap_or_default(); + let lower = name.to_lowercase(); + lower.contains("dockerfile") || lower.ends_with(".dockerfile") +} + +fn value_looks_like_local_source(value: &str) -> bool { + let path = Path::new(value); + path.is_absolute() + || matches!(value, "." | "..") + || value.starts_with("./") + || value.starts_with("../") + || value.starts_with("~/") + || filename_looks_like_dockerfile(path) +} + fn source_requests_gpu(source: &str) -> bool { resolve_from(source).is_ok_and(|resolved| match resolved { ResolvedSource::Image(image) => image_requests_gpu(&image), @@ -2660,12 +2701,6 @@ fn image_requests_gpu(image: &str) -> bool { image_name.contains("gpu") } -const VM_LOCAL_IMAGE_REF_SCHEME: &str = "openshell-vm-local-image"; - -fn vm_local_image_ref(image_ref: &str) -> String { - format!("{VM_LOCAL_IMAGE_REF_SCHEME}:{image_ref}") -} - fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) -> bool { !metadata.is_some_and(|metadata| metadata.is_remote) } @@ -2673,10 +2708,9 @@ fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) /// Build a Dockerfile and make the resulting image available to the gateway. /// /// For local Kubernetes gateways running in Docker, this imports the built image -/// into the gateway runtime and returns the Docker tag. For local VM gateways, -/// this returns an internal local-image URI. The VM driver resolves that URI -/// against the local Docker daemon and prepares the VM rootfs on the gateway -/// host. +/// into the gateway runtime and returns the Docker tag. Standalone local +/// gateways use the same Docker daemon that the CLI built into, so the tag is +/// passed through directly and the active compute driver resolves it. async fn build_from_dockerfile( dockerfile: &Path, context: &Path, @@ -2736,18 +2770,16 @@ async fn build_from_dockerfile( return Ok(tag); } - let local_image_ref = vm_local_image_ref(&tag); - eprintln!(); eprintln!( - "{} Image {} will be resolved by the local VM driver for gateway '{}'.", + "{} Image {} is available in the local Docker daemon for gateway '{}'.", "✓".green().bold(), tag.cyan(), gateway_name, ); eprintln!(); - Ok(local_image_ref) + Ok(tag) } /// Load sandbox policy YAML. @@ -5569,9 +5601,9 @@ mod tests { gateway_select_with, gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, inferred_provider_type, parse_cli_setting_value, parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message, - ready_false_condition_message, resolve_gateway_control_target_from, sandbox_should_persist, - shell_escape, source_requests_gpu, validate_gateway_name, validate_ssh_host, - vm_local_image_ref, + ready_false_condition_message, resolve_from, resolve_gateway_control_target_from, + sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name, + validate_ssh_host, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -5822,6 +5854,52 @@ mod tests { assert!(!source_requests_gpu("base")); } + #[test] + fn resolve_from_classifies_existing_dockerfile_path() { + let temp = tempfile::tempdir().expect("failed to create tempdir"); + let dockerfile = temp.path().join("Dockerfile"); + fs::write(&dockerfile, "FROM scratch\n").expect("failed to write Dockerfile"); + + match resolve_from(dockerfile.to_str().expect("temp path is not UTF-8")) + .expect("expected Dockerfile source") + { + super::ResolvedSource::Dockerfile { + dockerfile: resolved, + context, + } => { + assert_eq!( + resolved, + dockerfile + .canonicalize() + .expect("failed to canonicalize Dockerfile") + ); + assert_eq!( + context, + temp.path() + .canonicalize() + .expect("failed to canonicalize context") + ); + } + super::ResolvedSource::Image(image) => { + panic!("expected Dockerfile source, got image {image}"); + } + } + } + + #[test] + fn resolve_from_rejects_missing_explicit_dockerfile_path() { + let temp = tempfile::tempdir().expect("failed to create tempdir"); + let missing = temp.path().join("Dockerfile"); + + let err = resolve_from(missing.to_str().expect("temp path is not UTF-8")) + .expect_err("expected missing Dockerfile path to be rejected"); + + assert!( + err.to_string().contains("local --from path does not exist"), + "unexpected error: {err}" + ); + } + #[test] fn dockerfile_sources_are_rejected_for_remote_gateways() { let metadata = GatewayMetadata { @@ -5859,14 +5937,6 @@ mod tests { assert!(dockerfile_sources_supported_for_gateway(None)); } - #[test] - fn vm_local_image_ref_wraps_docker_image_ref() { - assert_eq!( - vm_local_image_ref("openshell/sandbox-from:123"), - "openshell-vm-local-image:openshell/sandbox-from:123" - ); - } - #[test] fn ready_false_condition_message_prefers_reason_and_message() { let status = SandboxStatus { diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 9dbd59808..e21ce8849 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -9,6 +9,7 @@ use crate::rootfs::{ prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path, }; use bollard::Docker; +use bollard::errors::Error as BollardError; use bollard::models::ContainerCreateBody; use bollard::query_parameters::{CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder}; use flate2::read::GzDecoder; @@ -92,7 +93,6 @@ const IMAGE_CACHE_ROOTFS_ARCHIVE: &str = "rootfs.tar"; const IMAGE_EXPORT_ROOTFS_ARCHIVE: &str = "source-rootfs.tar"; const IMAGE_IDENTITY_FILE: &str = "image-identity"; const IMAGE_REFERENCE_FILE: &str = "image-reference"; -const VM_LOCAL_IMAGE_REF_PREFIX: &str = "openshell-vm-local-image:"; static IMAGE_CACHE_BUILD_COUNTER: AtomicU64 = AtomicU64::new(0); #[derive(Debug, Clone)] @@ -760,9 +760,14 @@ impl VmDriver { sandbox_id: &str, image_ref: &str, ) -> Result { - if let Some(local_image_ref) = parse_vm_local_image_ref(image_ref)? { + if let Some((docker, image_identity)) = self.resolve_local_docker_image(image_ref).await? { return self - .ensure_cached_local_image_rootfs_archive(sandbox_id, image_ref, local_image_ref) + .ensure_cached_local_image_rootfs_archive( + sandbox_id, + image_ref, + &docker, + &image_identity, + ) .await; } @@ -853,19 +858,74 @@ impl VmDriver { Ok(image_identity) } + async fn resolve_local_docker_image( + &self, + image_ref: &str, + ) -> Result, Status> { + let required_local_image = is_openshell_local_build_image_ref(image_ref); + let docker = match Docker::connect_with_local_defaults() { + Ok(docker) => docker, + Err(err) if required_local_image => { + return Err(Status::failed_precondition(format!( + "failed to connect to local Docker daemon for locally built sandbox image '{image_ref}': {err}" + ))); + } + Err(err) => { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: local Docker daemon unavailable, falling back to registry" + ); + return Ok(None); + } + }; + + match docker.inspect_image(image_ref).await { + Ok(inspect) => { + let image_identity = + inspect + .id + .filter(|id| !id.trim().is_empty()) + .ok_or_else(|| { + Status::failed_precondition(format!( + "local Docker image '{image_ref}' inspect response has no image ID" + )) + })?; + info!( + image_ref = %image_ref, + image_identity = %image_identity, + "vm driver: resolved image from local Docker daemon" + ); + Ok(Some((docker, image_identity))) + } + Err(err) if is_docker_not_found_error(&err) && required_local_image => { + Err(Status::failed_precondition(format!( + "locally built sandbox image '{image_ref}' is not present in the local Docker daemon" + ))) + } + Err(err) if is_docker_not_found_error(&err) => Ok(None), + Err(err) if required_local_image => Err(Status::failed_precondition(format!( + "failed to inspect locally built sandbox image '{image_ref}': {err}" + ))), + Err(err) => { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: local Docker image inspection failed, falling back to registry" + ); + Ok(None) + } + } + } + async fn ensure_cached_local_image_rootfs_archive( &self, sandbox_id: &str, image_ref: &str, - local_image_ref: &str, + docker: &Docker, + image_identity: &str, ) -> Result { - let docker = Docker::connect_with_local_defaults().map_err(|err| { - Status::failed_precondition(format!( - "failed to connect to local Docker daemon for vm local image '{image_ref}': {err}" - )) - })?; - let image_identity = local_docker_image_identity(&docker, local_image_ref).await?; - let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); self.publish_platform_event( sandbox_id.to_string(), @@ -873,28 +933,28 @@ impl VmDriver { "vm", "Normal", "Pulling", - format!("Pulling image \"{local_image_ref}\""), + format!("Pulling image \"{image_ref}\""), ), ); if tokio::fs::metadata(&archive_path).await.is_ok() { - self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) .await; - return Ok(image_identity); + return Ok(image_identity.to_string()); } let _cache_guard = self.image_cache_lock.lock().await; if tokio::fs::metadata(&archive_path).await.is_ok() { - self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) .await; - return Ok(image_identity); + return Ok(image_identity.to_string()); } - self.build_cached_local_image_rootfs_archive(&docker, local_image_ref, &image_identity) + self.build_cached_local_image_rootfs_archive(docker, image_ref, image_identity) .await?; - self.publish_pulled_event(sandbox_id, local_image_ref, &archive_path) + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) .await; - Ok(image_identity) + Ok(image_identity.to_string()) } async fn build_cached_local_image_rootfs_archive( @@ -1420,33 +1480,18 @@ fn parse_registry_reference(image_ref: &str) -> Result { }) } -#[allow(clippy::result_large_err)] -fn parse_vm_local_image_ref(image_ref: &str) -> Result, Status> { - let Some(local_image_ref) = image_ref.strip_prefix(VM_LOCAL_IMAGE_REF_PREFIX) else { - return Ok(None); - }; - if local_image_ref.trim().is_empty() { - return Err(Status::failed_precondition( - "invalid vm local image reference: missing Docker image reference", - )); - } - Ok(Some(local_image_ref)) +fn is_openshell_local_build_image_ref(image_ref: &str) -> bool { + image_ref.starts_with("openshell/sandbox-from:") } -async fn local_docker_image_identity(docker: &Docker, image_ref: &str) -> Result { - let inspect = docker.inspect_image(image_ref).await.map_err(|err| { - Status::failed_precondition(format!( - "failed to inspect local Docker image '{image_ref}': {err}" - )) - })?; - inspect - .id - .filter(|id| !id.trim().is_empty()) - .ok_or_else(|| { - Status::failed_precondition(format!( - "local Docker image '{image_ref}' inspect response has no image ID" - )) - }) +fn is_docker_not_found_error(err: &BollardError) -> bool { + matches!( + err, + BollardError::DockerResponseServerError { + status_code: 404, + .. + } + ) } async fn export_local_image_rootfs_to_path( @@ -2678,27 +2723,14 @@ mod tests { } #[test] - fn parse_vm_local_image_ref_classifies_prefixed_refs() { - assert_eq!( - parse_vm_local_image_ref("openshell-vm-local-image:openshell/sandbox-from:123") - .unwrap(), - Some("openshell/sandbox-from:123") - ); - assert_eq!(parse_vm_local_image_ref("ubuntu:24.04").unwrap(), None); - } - - #[test] - fn parse_vm_local_image_ref_rejects_empty_refs() { - for image_ref in ["openshell-vm-local-image:", "openshell-vm-local-image: "] { - let err = - parse_vm_local_image_ref(image_ref).expect_err("empty local image refs must fail"); - assert_eq!(err.code(), Code::FailedPrecondition); - assert!( - err.message().contains("missing Docker image reference"), - "unexpected error: {}", - err.message() - ); - } + fn openshell_local_build_image_ref_matches_cli_tags() { + assert!(is_openshell_local_build_image_ref( + "openshell/sandbox-from:123" + )); + assert!(!is_openshell_local_build_image_ref("ubuntu:24.04")); + assert!(!is_openshell_local_build_image_ref( + "ghcr.io/nvidia/openshell/base:latest" + )); } #[test] diff --git a/scripts/bin/openshell b/scripts/bin/openshell index 4a85332c6..0383d5b57 100755 --- a/scripts/bin/openshell +++ b/scripts/bin/openshell @@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" BINARY="$PROJECT_ROOT/target/debug/openshell" STATE_FILE="$PROJECT_ROOT/.cache/openshell-build.state" +CALLER_PWD="$PWD" # --------------------------------------------------------------------------- # Fingerprint-based rebuild check @@ -26,7 +27,10 @@ else current_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown") # Collect dirty (modified, staged, untracked) files - mapfile -t changed_files < <( + changed_files=() + while IFS= read -r path; do + changed_files+=("$path") + done < <( { git diff --name-only 2>/dev/null git diff --name-only --cached 2>/dev/null @@ -118,7 +122,10 @@ if [[ "$needs_build" == "1" ]]; then cd "$PROJECT_ROOT" new_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown") # Recompute fingerprint of remaining dirty files (build may not change them) - mapfile -t post_files < <( + post_files=() + while IFS= read -r path; do + post_files+=("$path") + done < <( { git diff --name-only 2>/dev/null git diff --name-only --cached 2>/dev/null @@ -165,4 +172,5 @@ fingerprint=${new_fingerprint} EOF fi +cd "$CALLER_PWD" exec "$BINARY" "$@" From e356d6078d84d0a2ce2c61e3127f86e587c5e3bc Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 29 Apr 2026 21:10:38 -0700 Subject: [PATCH 10/11] wip --- architecture/sandbox-custom-containers.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md index 13a568e15..3dad52f0e 100644 --- a/architecture/sandbox-custom-containers.md +++ b/architecture/sandbox-custom-containers.md @@ -19,8 +19,9 @@ The CLI classifies the value in this order: 1. **Existing file** whose name contains "Dockerfile" (case-insensitive) — treated as a Dockerfile to build. 2. **Existing directory** containing a `Dockerfile` — treated as a build context directory. -3. **Contains `/`, `:`, or `.`** — treated as a full container image reference. -4. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`. +3. **Missing explicit local path** (for example `./Dockerfile`, `../ctx`, or an absolute path) — rejected locally instead of sent to the gateway as an image pull. +4. **Contains `/`, `:`, or `.`** — treated as a full container image reference. +5. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`. The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sandboxes` and can be overridden with the `OPENSHELL_COMMUNITY_REGISTRY` environment variable. From 60212ea8d598f567d725f0abd5f224a6b9f2a201 Mon Sep 17 00:00:00 2001 From: Vincent Caux-Brisebois Date: Thu, 30 Apr 2026 23:32:12 +0000 Subject: [PATCH 11/11] Split GPU sandbox support into userspace (Dockerfile) and kernel modules (driver-side injection at sandbox creation time). The VM driver resolves pre-built .ko files from env var, state dir, build tree, or host paths and injects them into the per-sandbox rootfs alongside firmware and kmod symlinks. --- Building | 0 Downloading | 0 .../scripts/openshell-vm-sandbox-init.sh | 16 + crates/openshell-driver-vm/src/driver.rs | 37 +- crates/openshell-driver-vm/src/rootfs.rs | 561 +++++++++++++++++- crates/openshell-driver-vm/src/runtime.rs | 2 + .../runtime/kernel/openshell.kconfig | 22 + sandboxes/nvidia-gpu/Dockerfile | 84 +++ sandboxes/nvidia-gpu/README.md | 98 +++ sandboxes/nvidia-gpu/versions.env | 6 + tasks/scripts/gateway-vm.sh | 19 +- tasks/scripts/vm/build-nvidia-modules.sh | 176 ++++++ tasks/scripts/vm/build-supervisor-bundle.sh | 29 +- tasks/vm.toml | 4 + 14 files changed, 1037 insertions(+), 17 deletions(-) create mode 100644 Building create mode 100644 Downloading create mode 100644 sandboxes/nvidia-gpu/Dockerfile create mode 100644 sandboxes/nvidia-gpu/README.md create mode 100644 sandboxes/nvidia-gpu/versions.env create mode 100755 tasks/scripts/vm/build-nvidia-modules.sh diff --git a/Building b/Building new file mode 100644 index 000000000..e69de29bb diff --git a/Downloading b/Downloading new file mode 100644 index 000000000..e69de29bb diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh index 063a75032..18c4c09a2 100644 --- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh +++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh @@ -243,6 +243,17 @@ create_gpu_device_nodes_mknod() { setup_gpu() { ts "GPU_ENABLED=true — initializing GPU passthrough" + # Kernel modules are built for a specific guest kernel version. + # If the running kernel doesn't match, depmod/modprobe will silently fail. + local expected_kver="6.12.76" + local actual_kver + actual_kver="$(uname -r)" + if [ "${actual_kver}" != "${expected_kver}" ]; then + ts "WARNING: kernel version mismatch: expected ${expected_kver}, got ${actual_kver}" + ts " GPU modules are installed under lib/modules/${expected_kver}/" + ts " modprobe may fail to find them" + fi + if ! command -v modprobe >/dev/null 2>&1; then ts "FATAL: modprobe not found; cannot load nvidia kernel modules" return 1 @@ -258,6 +269,11 @@ setup_gpu() { fi fi + ts "generating module dependency index" + if ! depmod -a "$(uname -r)" 2>/dev/null; then + ts "WARNING: depmod failed; modprobe may not find modules" + fi + ts "loading nvidia kernel modules" modprobe nvidia || { ts "FATAL: modprobe nvidia failed"; return 1; } modprobe nvidia_uvm 2>/dev/null || true diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index e21ce8849..f2a766101 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -5,8 +5,8 @@ use crate::gpu::{ GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name, }; use crate::rootfs::{ - create_rootfs_archive_from_dir, extract_rootfs_archive_to, - prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path, + create_rootfs_archive_from_dir, extract_rootfs_archive_to, inject_gpu_modules, + prepare_sandbox_rootfs_from_image_root, refresh_runtime_artifacts, sandbox_guest_init_path, }; use bollard::Docker; use bollard::errors::Error as BollardError; @@ -419,6 +419,28 @@ impl VmDriver { return Err(err); } }; + if is_gpu { + let rootfs_for_gpu = rootfs.clone(); + let driver_state_dir = self.config.state_dir.clone(); + if let Err(err) = tokio::task::spawn_blocking(move || { + inject_gpu_modules(&rootfs_for_gpu, &driver_state_dir) + }) + .await + .map_err(|e| Status::internal(format!("GPU module injection panicked: {e}")))? + { + warn!( + sandbox_id = %sandbox.id, + error = %err, + "vm driver: GPU module injection failed" + ); + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::failed_precondition(format!( + "GPU module injection failed: {err}" + ))); + } + info!(sandbox_id = %sandbox.id, "vm driver: GPU modules injected into rootfs"); + } + if let Some(tls_paths) = tls_paths.as_ref() && let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await { @@ -738,10 +760,13 @@ impl VmDriver { .await?; let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); let rootfs_dest = rootfs.to_path_buf(); - tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest)) - .await - .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? - .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; + tokio::task::spawn_blocking(move || { + extract_rootfs_archive_to(&archive_path, &rootfs_dest)?; + refresh_runtime_artifacts(&rootfs_dest) + }) + .await + .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? + .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; Ok(image_identity) } diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index 4eeb28917..b3e13ccbc 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -4,7 +4,7 @@ use std::fs; use std::fs::File; use std::io::{BufWriter, Cursor}; -use std::path::Path; +use std::path::{Path, PathBuf}; const SUPERVISOR: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/openshell-sandbox.zst")); const ROOTFS_VARIANT_MARKER: &str = ".openshell-rootfs-variant"; @@ -29,6 +29,32 @@ pub fn prepare_sandbox_rootfs_from_image_root( Ok(()) } +/// Re-inject the init script and supervisor binary into an already-prepared +/// rootfs. The image rootfs archive cache is keyed by image digest, so a +/// driver rebuild does not invalidate it. Calling this after extraction +/// ensures the guest always runs the init script and supervisor that match +/// the running driver binary. +pub fn refresh_runtime_artifacts(rootfs: &Path) -> Result<(), String> { + let init_path = rootfs.join("srv/openshell-vm-sandbox-init.sh"); + if let Some(parent) = init_path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?; + } + fs::write( + &init_path, + include_str!("../scripts/openshell-vm-sandbox-init.sh"), + ) + .map_err(|e| format!("write {}: {e}", init_path.display()))?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + fs::set_permissions(&init_path, fs::Permissions::from_mode(0o755)) + .map_err(|e| format!("chmod {}: {e}", init_path.display()))?; + } + + ensure_supervisor_binary(rootfs)?; + Ok(()) +} + pub fn extract_rootfs_archive_to(archive_path: &Path, dest: &Path) -> Result<(), String> { if dest.exists() { fs::remove_dir_all(dest) @@ -193,6 +219,350 @@ pub fn validate_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { Ok(()) } +/// Kernel version of the libkrunfw guest. Modules must be compiled against +/// this exact version; a mismatch causes `modprobe` failures at boot. +/// +/// Keep in sync with: +/// - `tasks/scripts/vm/build-nvidia-modules.sh` (KERNEL_TREE path) +/// - `openshell-vm-sandbox-init.sh` `setup_gpu()` expected version +const GUEST_KERNEL_VERSION: &str = "6.12.76"; + +/// Inject NVIDIA kernel modules, firmware, and `kmod` tooling into a prepared +/// sandbox rootfs. Called by the driver when a sandbox requests GPU support. +/// +/// Module source resolution order: +/// 1. `OPENSHELL_GPU_MODULES_DIR` environment variable +/// 2. `/gpu-modules/` (pre-provisioned by the operator) +/// +/// Firmware source resolution (first match wins): +/// 0. Rootfs already contains `.bin` files (e.g. from the image's `.run` +/// installer) — **skip injection entirely** to avoid version mismatch. +/// 1. `/../nvidia-firmware/` +/// 2. Host `/lib/firmware/nvidia/` +/// +/// Returns an error only if module injection is impossible (no source found +/// or a write fails). Missing firmware emits a warning and continues. +pub fn inject_gpu_modules(rootfs: &Path, state_dir: &Path) -> Result<(), String> { + let modules_dir = resolve_gpu_modules_dir(state_dir)?; + + let ko_files: Vec = fs::read_dir(&modules_dir) + .map_err(|e| format!("read GPU modules dir {}: {e}", modules_dir.display()))? + .filter_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "ko") { + Some(path) + } else { + None + } + }) + .collect(); + + if ko_files.is_empty() { + return Err(format!( + "GPU modules dir {} contains no .ko files", + modules_dir.display() + )); + } + + let modules_dst = rootfs.join(format!( + "lib/modules/{GUEST_KERNEL_VERSION}/kernel/drivers/nvidia" + )); + fs::create_dir_all(&modules_dst) + .map_err(|e| format!("create {}: {e}", modules_dst.display()))?; + + for ko in &ko_files { + let dest = modules_dst.join(ko.file_name().unwrap()); + let bytes_copied = fs::copy(ko, &dest).map_err(|e| { + format!( + "copy {} -> {}: {e}", + ko.display(), + dest.display() + ) + })?; + tracing::info!( + module = %ko.file_name().unwrap().to_string_lossy(), + size_bytes = bytes_copied, + src = %ko.display(), + "injected GPU kernel module" + ); + } + + inject_gpu_firmware(rootfs, &modules_dir); + ensure_kmod_symlinks(rootfs); + warn_missing_gpu_userspace(rootfs); + + Ok(()) +} + +/// Check whether the rootfs contains essential GPU userspace binaries. +/// Emits actionable warnings when the sandbox image lacks nvidia-smi +/// or CUDA libraries — common when `--gpu` is used with a non-GPU base +/// image like `ubuntu:latest` instead of the GPU sandbox Dockerfile. +fn warn_missing_gpu_userspace(rootfs: &Path) { + let nvidia_smi_candidates = [ + "usr/bin/nvidia-smi", + "usr/local/bin/nvidia-smi", + "bin/nvidia-smi", + ]; + let has_nvidia_smi = nvidia_smi_candidates + .iter() + .any(|p| rootfs.join(p).exists()); + + if !has_nvidia_smi { + tracing::warn!( + "GPU sandbox image does not contain nvidia-smi. The sandbox will \ + have GPU kernel modules but no NVIDIA userspace tools. Use a \ + GPU-enabled image (e.g. --from ./sandboxes/nvidia-gpu/Dockerfile) \ + or install the NVIDIA driver userspace in your image." + ); + } +} + +/// Locate the directory containing pre-built NVIDIA `.ko` files. +/// +/// Resolution order: +/// 1. `OPENSHELL_GPU_MODULES_DIR` env var (explicit override) +/// 2. `/gpu-modules/` (operator pre-provisioned) +/// 3. `/target/libkrun-build/nvidia-modules/` (build tree, +/// discovered relative to the driver executable) +/// 4. Host `/lib/modules//kernel/drivers/nvidia/` +fn resolve_gpu_modules_dir(state_dir: &Path) -> Result { + if let Ok(dir) = std::env::var("OPENSHELL_GPU_MODULES_DIR") { + let p = PathBuf::from(&dir); + if p.is_dir() { + tracing::info!(path = %p.display(), "using GPU modules from OPENSHELL_GPU_MODULES_DIR"); + return Ok(p); + } + return Err(format!( + "OPENSHELL_GPU_MODULES_DIR={dir} is not a directory" + )); + } + + let provisioned = state_dir.join("gpu-modules"); + if provisioned.is_dir() { + tracing::info!(path = %provisioned.display(), "using pre-provisioned GPU modules"); + return Ok(provisioned); + } + + // Auto-discover from the build tree. The driver binary lives at + // `target/{debug,release}/openshell-driver-vm`, so the project root + // is two levels up. The old GPU rootfs script places modules at + // `target/libkrun-build/nvidia-modules/`. + if let Some(build_tree_dir) = discover_build_tree_modules() { + return Ok(build_tree_dir); + } + + // Check common host-installed module paths. + for candidate in [ + format!("/lib/modules/{GUEST_KERNEL_VERSION}/kernel/drivers/nvidia"), + format!("/lib/modules/{GUEST_KERNEL_VERSION}/extra/nvidia"), + ] { + let p = PathBuf::from(&candidate); + if dir_has_ko_files(&p) { + tracing::info!(path = %p.display(), "using host-installed GPU modules"); + return Ok(p); + } + } + + Err(format!( + "No GPU kernel modules found. Searched: OPENSHELL_GPU_MODULES_DIR (unset), \ + {}, build tree, host /lib/modules/{}. \ + Build modules with `mise run vm:nvidia-modules` \ + or set OPENSHELL_GPU_MODULES_DIR.", + provisioned.display(), + GUEST_KERNEL_VERSION, + )) +} + +/// Walk up from the driver executable to find `target/libkrun-build/nvidia-modules/`. +/// +/// This is a development convenience — production deployments should use +/// `OPENSHELL_GPU_MODULES_DIR` or pre-provision `/gpu-modules/`. +fn discover_build_tree_modules() -> Option { + #[cfg(unix)] + if unsafe { libc::getuid() } == 0 { + tracing::debug!("build-tree GPU module discovery running as root; \ + prefer OPENSHELL_GPU_MODULES_DIR in production"); + } + let exe = std::env::current_exe().ok()?; + // exe is typically target/{debug,release}/openshell-driver-vm + let target_dir = exe.parent()?.parent()?; + let modules_dir = target_dir.join("libkrun-build/nvidia-modules"); + if dir_has_ko_files(&modules_dir) { + tracing::info!( + path = %modules_dir.display(), + "auto-discovered GPU modules in build tree" + ); + return Some(modules_dir); + } + + // Also try CWD-relative (for `cargo run` or `mise run` from project root). + let cwd_candidate = PathBuf::from("target/libkrun-build/nvidia-modules"); + if dir_has_ko_files(&cwd_candidate) { + let abs = cwd_candidate.canonicalize().unwrap_or(cwd_candidate.clone()); + tracing::info!( + path = %abs.display(), + "auto-discovered GPU modules relative to CWD" + ); + return Some(abs); + } + + None +} + +fn dir_has_ko_files(dir: &Path) -> bool { + if !dir.is_dir() { + return false; + } + let Some(entries) = fs::read_dir(dir).ok() else { + return false; + }; + let mut has_uncompressed = false; + let mut has_compressed = false; + for entry in entries.flatten() { + let path = entry.path(); + match path.extension().and_then(|e| e.to_str()) { + Some("ko") => has_uncompressed = true, + Some("zst" | "xz") => { + if path.file_stem().and_then(|s| std::path::Path::new(s).extension()).is_some_and(|ext| ext == "ko") { + has_compressed = true; + } + } + _ => {} + } + } + if !has_uncompressed && has_compressed { + tracing::warn!( + path = %dir.display(), + "directory contains compressed .ko.zst/.ko.xz modules but only uncompressed .ko files are supported" + ); + } + has_uncompressed +} + +/// Copy NVIDIA GSP firmware into the rootfs. Non-fatal on failure. +/// +/// Skips injection if the rootfs already contains `.bin` firmware files +/// (e.g. the sandbox Docker image installed them via the NVIDIA `.run` +/// installer). Overwriting image-provided firmware with build-tree or +/// host firmware causes version mismatches when the host driver differs +/// from the image's driver version. +fn inject_gpu_firmware(rootfs: &Path, modules_dir: &Path) { + let fw_dst = rootfs.join("lib/firmware/nvidia"); + + if rootfs_has_firmware_bins(&fw_dst) { + tracing::info!( + path = %fw_dst.display(), + "rootfs already contains GPU firmware; skipping injection" + ); + return; + } + + // Try version-matched firmware next to the modules directory. + let fw_parent = modules_dir + .parent() + .map(|p| p.join("nvidia-firmware")); + + if let Some(ref fw_dir) = fw_parent { + if fw_dir.is_dir() { + if let Err(e) = copy_dir_contents(fw_dir, &fw_dst) { + tracing::warn!(error = %e, "failed to copy version-matched firmware"); + } else { + tracing::info!(src = %fw_dir.display(), "injected GPU firmware (version-matched)"); + return; + } + } + } + + // Fallback: host firmware + for candidate in ["/lib/firmware/nvidia", "/usr/lib/firmware/nvidia"] { + let host_fw = Path::new(candidate); + if host_fw.is_dir() { + if let Err(e) = copy_dir_contents(host_fw, &fw_dst) { + tracing::warn!(error = %e, src = candidate, "failed to copy host firmware"); + } else { + tracing::info!(src = candidate, "injected GPU firmware from host"); + return; + } + } + } + + tracing::warn!( + "no NVIDIA GSP firmware found; GPU guests may fail to initialize. \ + Place firmware in {:?} or host /lib/firmware/nvidia/", + fw_parent.as_deref().unwrap_or(Path::new("(unknown)")) + ); +} + +/// Check whether a firmware directory (or any subdirectory) contains `.bin` files. +fn rootfs_has_firmware_bins(fw_dir: &Path) -> bool { + if !fw_dir.is_dir() { + return false; + } + let Ok(entries) = fs::read_dir(fw_dir) else { + return false; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "bin") { + return true; + } + if path.is_dir() && rootfs_has_firmware_bins(&path) { + return true; + } + } + false +} + +/// Ensure `modprobe`, `insmod`, etc. symlinks exist. Many minimal container +/// images install `kmod` but lack the convenience symlinks in `/usr/sbin`. +fn ensure_kmod_symlinks(rootfs: &Path) { + let kmod_candidates = ["bin/kmod", "usr/bin/kmod", "sbin/kmod", "usr/sbin/kmod"]; + let kmod_exists = kmod_candidates + .iter() + .any(|p| rootfs.join(p).exists()); + + if !kmod_exists { + tracing::warn!("kmod not found in rootfs; modprobe will fail. \ + Ensure the sandbox image installs the 'kmod' package."); + return; + } + + let sbin = rootfs.join("usr/sbin"); + let _ = fs::create_dir_all(&sbin); + for tool in ["modprobe", "insmod", "rmmod", "lsmod", "depmod"] { + let link = sbin.join(tool); + if !link.exists() { + #[cfg(unix)] + { + let _ = std::os::unix::fs::symlink("../../bin/kmod", &link) + .or_else(|_| std::os::unix::fs::symlink("/usr/bin/kmod", &link)); + } + } + } +} + +/// Recursively copy all files from `src` to `dst`, preserving directory structure. +fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> { + fs::create_dir_all(dst).map_err(|e| format!("create {}: {e}", dst.display()))?; + + for entry in fs::read_dir(src).map_err(|e| format!("read {}: {e}", src.display()))? { + let entry = entry.map_err(|e| format!("read entry in {}: {e}", src.display()))?; + let src_path = entry.path(); + let dst_path = dst.join(entry.file_name()); + + if src_path.is_dir() { + copy_dir_contents(&src_path, &dst_path)?; + } else { + fs::copy(&src_path, &dst_path).map_err(|e| { + format!("copy {} -> {}: {e}", src_path.display(), dst_path.display()) + })?; + } + } + Ok(()) +} + fn ensure_sandbox_guest_user(rootfs: &Path) -> Result<(), String> { const SANDBOX_UID: u32 = 10001; const SANDBOX_GID: u32 = 10001; @@ -415,6 +785,195 @@ mod tests { let _ = fs::remove_dir_all(&dir); } + #[test] + fn refresh_runtime_artifacts_overwrites_stale_init_script() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + + fs::create_dir_all(rootfs.join("srv")).expect("create srv"); + fs::create_dir_all(rootfs.join("opt/openshell/bin")).expect("create openshell bin"); + fs::write( + rootfs.join("srv/openshell-vm-sandbox-init.sh"), + b"#!/bin/bash\n# stale placeholder", + ) + .expect("write stale init"); + fs::write( + rootfs.join("opt/openshell/bin/openshell-sandbox"), + b"old-supervisor", + ) + .expect("write stale supervisor"); + + refresh_runtime_artifacts(&rootfs).expect("refresh runtime artifacts"); + + let init_content = + fs::read_to_string(rootfs.join("srv/openshell-vm-sandbox-init.sh")).expect("read init"); + assert!( + init_content.contains("setup_gpu"), + "refreshed init script should contain GPU setup logic" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn inject_gpu_modules_copies_ko_files() { + let dir = unique_temp_dir(); + let modules_dir = dir.join("modules"); + let rootfs = dir.join("rootfs"); + + fs::create_dir_all(&modules_dir).expect("create modules dir"); + fs::create_dir_all(&rootfs).expect("create rootfs dir"); + fs::write(modules_dir.join("nvidia.ko"), b"\x7fELF-fake-module-1").expect("write nvidia.ko"); + fs::write(modules_dir.join("nvidia-uvm.ko"), b"\x7fELF-fake-module-2") + .expect("write nvidia-uvm.ko"); + + unsafe { std::env::set_var("OPENSHELL_GPU_MODULES_DIR", &modules_dir) }; + let result = inject_gpu_modules(&rootfs, Path::new("/dummy/state")); + unsafe { std::env::remove_var("OPENSHELL_GPU_MODULES_DIR") }; + + result.expect("inject_gpu_modules should succeed"); + + let dest = rootfs.join("lib/modules/6.12.76/kernel/drivers/nvidia"); + assert!(dest.join("nvidia.ko").is_file()); + assert!(dest.join("nvidia-uvm.ko").is_file()); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn inject_gpu_modules_fails_with_no_ko_files() { + let dir = unique_temp_dir(); + let modules_dir = dir.join("modules"); + + fs::create_dir_all(&modules_dir).expect("create modules dir"); + fs::write(modules_dir.join("readme.txt"), b"not a kernel module").expect("write txt"); + + unsafe { std::env::set_var("OPENSHELL_GPU_MODULES_DIR", &modules_dir) }; + let result = inject_gpu_modules(Path::new("/dummy/rootfs"), Path::new("/dummy/state")); + unsafe { std::env::remove_var("OPENSHELL_GPU_MODULES_DIR") }; + + let err = result.expect_err("should fail with no .ko files"); + assert!( + err.contains("no .ko files"), + "error should mention 'no .ko files', got: {err}" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn inject_gpu_modules_fails_with_missing_dir() { + let dir = unique_temp_dir(); + let missing = dir.join("does-not-exist"); + + unsafe { std::env::set_var("OPENSHELL_GPU_MODULES_DIR", &missing) }; + let result = inject_gpu_modules(Path::new("/dummy/rootfs"), Path::new("/dummy/state")); + unsafe { std::env::remove_var("OPENSHELL_GPU_MODULES_DIR") }; + + let err = result.expect_err("should fail with missing directory"); + assert!( + err.contains("not a directory"), + "error should mention 'not a directory', got: {err}" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn inject_gpu_firmware_skips_when_rootfs_has_bins() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + let modules_dir = dir.join("modules"); + let fw_dir = rootfs.join("lib/firmware/nvidia"); + + fs::create_dir_all(&fw_dir).expect("create firmware dir"); + fs::create_dir_all(&modules_dir).expect("create modules dir"); + fs::write(fw_dir.join("gsp.bin"), b"original-firmware-content").expect("write gsp.bin"); + + inject_gpu_firmware(&rootfs, &modules_dir); + + let content = fs::read(fw_dir.join("gsp.bin")).expect("read gsp.bin after injection"); + assert_eq!( + content, + b"original-firmware-content", + "firmware should not be overwritten when rootfs already has .bin files" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn ensure_kmod_symlinks_creates_links() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + + fs::create_dir_all(rootfs.join("bin")).expect("create bin"); + fs::write(rootfs.join("bin/kmod"), b"kmod-stub").expect("write kmod"); + + ensure_kmod_symlinks(&rootfs); + + assert!( + rootfs.join("usr/sbin/modprobe").exists(), + "modprobe symlink should exist" + ); + assert!( + rootfs.join("usr/sbin/insmod").exists(), + "insmod symlink should exist" + ); + assert!( + rootfs.join("usr/sbin/depmod").exists(), + "depmod symlink should exist" + ); + assert!( + fs::symlink_metadata(rootfs.join("usr/sbin/modprobe")) + .unwrap() + .file_type() + .is_symlink(), + "modprobe should be a symlink" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn ensure_kmod_symlinks_warns_without_kmod() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + + fs::create_dir_all(&rootfs).expect("create rootfs"); + + ensure_kmod_symlinks(&rootfs); + + assert!( + !rootfs.join("usr/sbin/modprobe").exists(), + "modprobe should not exist when kmod is missing" + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn rootfs_has_firmware_bins_detects_nested() { + let dir1 = unique_temp_dir(); + fs::create_dir_all(dir1.join("subdir")).expect("create subdir"); + fs::write(dir1.join("subdir/file.bin"), b"firmware").expect("write .bin"); + assert!( + rootfs_has_firmware_bins(&dir1), + "should detect .bin in nested subdir" + ); + let _ = fs::remove_dir_all(&dir1); + + let dir2 = unique_temp_dir(); + fs::create_dir_all(dir2.join("subdir")).expect("create subdir"); + fs::write(dir2.join("subdir/file.txt"), b"not firmware").expect("write .txt"); + assert!( + !rootfs_has_firmware_bins(&dir2), + "should not detect .txt as firmware" + ); + let _ = fs::remove_dir_all(&dir2); + } + fn unique_temp_dir() -> PathBuf { static COUNTER: AtomicU64 = AtomicU64::new(0); let nanos = SystemTime::now() diff --git a/crates/openshell-driver-vm/src/runtime.rs b/crates/openshell-driver-vm/src/runtime.rs index a7c9afcea..e87e06a1e 100644 --- a/crates/openshell-driver-vm/src/runtime.rs +++ b/crates/openshell-driver-vm/src/runtime.rs @@ -331,6 +331,8 @@ fn build_kernel_cmdline(config: &VmLaunchConfig) -> String { if config.gpu_bdf.is_some() { parts.push("GPU_ENABLED=true".to_string()); parts.push("firmware_class.path=/lib/firmware".to_string()); + parts.push("modprobe.blacklist=nouveau".to_string()); + parts.push("nouveau.modeset=0".to_string()); } parts.join(" ") diff --git a/crates/openshell-vm/runtime/kernel/openshell.kconfig b/crates/openshell-vm/runtime/kernel/openshell.kconfig index b5f0330af..072bf6e16 100644 --- a/crates/openshell-vm/runtime/kernel/openshell.kconfig +++ b/crates/openshell-vm/runtime/kernel/openshell.kconfig @@ -123,6 +123,28 @@ CONFIG_MEMCG=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y +# ── PCI/PCIe (required for GPU passthrough via QEMU vfio-pci) ──────────── +# The libkrunfw base config disables CONFIG_PCI. GPU sandboxes using the +# QEMU backend pass the GPU through as a PCIe device on a q35 machine. +# Without PCI core support the guest kernel cannot see any PCI bus, so the +# nvidia driver loads but finds zero devices. +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y + +# ── Loadable kernel modules (required for GPU passthrough) ────────────── +# The libkrunfw base config disables CONFIG_MODULES. GPU sandboxes need it +# to load nvidia.ko, nvidia-uvm.ko, and nvidia-modeset.ko at boot via +# modprobe. Without this, the guest kernel rejects all module loads. +# +# SECURITY NOTE: This enables module loading for ALL VMs (including +# non-GPU), expanding the guest kernel attack surface. The sandbox +# supervisor's seccomp profile must block init_module/finit_module +# syscalls for the sandbox user to prevent arbitrary module loading. +# Tracked: consider per-purpose kernel builds (GPU vs non-GPU). +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y + # ── Security features required by the sandbox runtime ─────────────────── CONFIG_SECURITY_LANDLOCK=y CONFIG_SECCOMP_FILTER=y diff --git a/sandboxes/nvidia-gpu/Dockerfile b/sandboxes/nvidia-gpu/Dockerfile new file mode 100644 index 000000000..372d58b58 --- /dev/null +++ b/sandboxes/nvidia-gpu/Dockerfile @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# GPU-enabled sandbox image for OpenShell VM driver. +# +# Provides userspace GPU tooling (nvidia-smi, NVML, CUDA driver libs, kmod) +# on top of a minimal Ubuntu base with the full NVIDIA driver userspace +# installed via the official .run installer (no kernel modules -- those are +# injected at rootfs preparation time by the VM driver). +# +# Usage: +# openshell sandbox create --gpu --from ./sandboxes/nvidia-gpu/Dockerfile +# openshell sandbox create --gpu --from nvidia-gpu # once published +# +# Build-time args: +# CUDA_VERSION - CUDA toolkit version (default: 12.8.1) +# UBUNTU_VERSION - Ubuntu release (default: 22.04) +# NVIDIA_DRIVER_VERSION - Must match the kernel modules built by +# `mise run vm:nvidia-modules` (default: 580.159.03) + +ARG CUDA_VERSION=12.8.1 +ARG UBUNTU_VERSION=22.04 + +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION} + +# Must match NVIDIA_DRIVER_VERSION in sandboxes/nvidia-gpu/versions.env +# and NVIDIA_OPEN_VERSION in tasks/scripts/vm/build-nvidia-modules.sh +ARG NVIDIA_DRIVER_VERSION=580.159.03 + +# ── System packages required by the sandbox init script ────────────── +RUN apt-get update && apt-get install -y --no-install-recommends \ + bash \ + busybox-static \ + ca-certificates \ + curl \ + iproute2 \ + iptables \ + kmod \ + pciutils \ + && rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /usr/share/udhcpc && ln -sf /bin/busybox /sbin/udhcpc + +# ── NVIDIA driver userspace ────────────────────────────────────────── +# The nvidia/cuda base image does NOT include the driver (nvidia-smi, +# libcuda.so, libnvidia-ml.so). It relies on the NVIDIA Container +# Runtime to mount them from the host. In a VM there is no container +# runtime, so we install the driver userspace via the .run installer +# with --no-kernel-module (kernel modules are injected separately). +# TODO(gpu): Pin SHA-256 checksum for reproducible builds. Compute with: +# curl -fsSL | sha256sum +RUN curl -fsSL \ + "https://us.download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_DRIVER_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}.run" \ + -o /tmp/nvidia.run \ + && chmod +x /tmp/nvidia.run \ + && /tmp/nvidia.run \ + --silent \ + --no-kernel-module \ + --no-drm \ + --no-x-check \ + --no-systemd \ + --no-nvidia-modprobe \ + --no-distro-scripts \ + && rm -f /tmp/nvidia.run + +# Ensure library paths are indexed for dlopen. +RUN mkdir -p /etc/ld.so.conf.d \ + && echo "/usr/local/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf \ + && echo "/usr/lib/x86_64-linux-gnu" >> /etc/ld.so.conf.d/cuda.conf \ + && ldconfig 2>/dev/null || true + +# ── Kernel modules ─────────────────────────────────────────────────── +# NVIDIA kernel modules (.ko) must match the guest VM kernel (libkrunfw). +# They are NOT in this image -- the VM driver injects them at rootfs +# preparation time via `inject_gpu_modules`. +# +# GSP firmware (.bin) IS provided by the .run installer above. The VM +# driver detects its presence and skips firmware injection, avoiding +# version mismatches when the host driver differs from this image's. +RUN mkdir -p /lib/modules + +LABEL org.opencontainers.image.title="OpenShell GPU Sandbox" \ + org.opencontainers.image.description="GPU-enabled sandbox for OpenShell VM driver with CUDA support" \ + io.openshell.sandbox.gpu="true" diff --git a/sandboxes/nvidia-gpu/README.md b/sandboxes/nvidia-gpu/README.md new file mode 100644 index 000000000..7826b59cb --- /dev/null +++ b/sandboxes/nvidia-gpu/README.md @@ -0,0 +1,98 @@ + + + +# GPU Sandbox Image + +GPU-enabled sandbox image for the OpenShell VM driver. Provides NVIDIA +userspace tooling (nvidia-smi, NVML, CUDA driver libraries) on top of a +minimal Ubuntu base. Kernel modules are injected separately by the VM +driver at sandbox creation time. + +## Architecture + +The GPU sandbox splits responsibility between the container image and the +VM driver: + +| Layer | Source | Contents | +|-------|--------|----------| +| **Userspace** | This Dockerfile | nvidia-smi, libcuda.so, libnvidia-ml.so, kmod, iproute2 | +| **Kernel modules** | VM driver injection | nvidia.ko, nvidia_uvm.ko, nvidia_modeset.ko (built for guest kernel 6.12.76) | +| **GSP firmware** | `.run` installer in image OR host fallback | gsp_ga10x.bin, gsp_tu10x.bin | + +The kernel modules must be compiled against the exact guest kernel version +used by libkrunfw. The VM driver injects them into each sandbox's rootfs +at creation time via `inject_gpu_modules()`. + +## Prerequisites + +- Linux x86_64 host with an NVIDIA GPU +- IOMMU enabled (for VFIO GPU passthrough) +- Docker (for building the sandbox image) +- Guest kernel built with `CONFIG_MODULES=y` (`mise run vm:setup`) + +## Quick Start + +```shell +# 1. One-time: build the VM runtime (includes guest kernel with module support) +mise run vm:setup + +# 2. Build NVIDIA kernel modules for the guest kernel +mise run vm:nvidia-modules + +# 3. Build the GPU sandbox image +docker build -t nvidia-gpu:latest ./sandboxes/nvidia-gpu/ + +# 4. Start the gateway with GPU support +sudo mise run gateway:vm -- --gpu + +# 5. Create a GPU sandbox +openshell sandbox create --gpu --from nvidia-gpu:latest +``` + +## Version Coupling + +The NVIDIA driver version must match across three components: + +| Component | Variable | Default | +|-----------|----------|---------| +| Dockerfile (userspace) | `NVIDIA_DRIVER_VERSION` | `580.159.03` | +| Module build script | `NVIDIA_OPEN_VERSION` | `580.159.03` | +| Shared reference | `sandboxes/nvidia-gpu/versions.env` | `580.159.03` | + +A mismatch causes `modprobe` "version magic" errors or nvidia-smi ABI +failures at sandbox boot time. + +## Customization + +### Changing the CUDA version + +```shell +docker build \ + --build-arg CUDA_VERSION=12.6.0 \ + --build-arg UBUNTU_VERSION=22.04 \ + -t my-gpu-sandbox:latest \ + ./sandboxes/nvidia-gpu/ +``` + +### Changing the NVIDIA driver version + +Update all three locations: +1. `sandboxes/nvidia-gpu/versions.env` +2. `sandboxes/nvidia-gpu/Dockerfile` ARG `NVIDIA_DRIVER_VERSION` +3. Rebuild kernel modules: `NVIDIA_OPEN_VERSION= mise run vm:nvidia-modules` + +### Adding packages + +Add packages to the `apt-get install` line in the Dockerfile. The image +must retain `bash`, `kmod`, `iproute2`, and `busybox-static` — the VM +driver validates these at rootfs preparation time. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| "No GPU kernel modules found" | Modules not built | `mise run vm:nvidia-modules` | +| "kmod not found in rootfs" | Image missing kmod package | Add `kmod` to Dockerfile `apt-get install` | +| `modprobe nvidia` fails | Kernel version mismatch | Rebuild modules after `mise run vm:setup` | +| nvidia-smi "driver/library mismatch" | Userspace/module version mismatch | Ensure Dockerfile and module versions match | +| "kernel version mismatch: expected X, got Y" | Guest kernel was rebuilt | Rebuild modules: `mise run vm:nvidia-modules` | diff --git a/sandboxes/nvidia-gpu/versions.env b/sandboxes/nvidia-gpu/versions.env new file mode 100644 index 000000000..d2f086da8 --- /dev/null +++ b/sandboxes/nvidia-gpu/versions.env @@ -0,0 +1,6 @@ +# Shared NVIDIA driver/module version for GPU sandbox images. +# Referenced by: +# - sandboxes/nvidia-gpu/Dockerfile (ARG NVIDIA_DRIVER_VERSION) +# - tasks/scripts/vm/build-nvidia-modules.sh (NVIDIA_OPEN_VERSION) +# These MUST match for kernel modules and userspace to be compatible. +NVIDIA_DRIVER_VERSION=580.159.03 diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh index cdeaa7931..2efbf699e 100755 --- a/tasks/scripts/gateway-vm.sh +++ b/tasks/scripts/gateway-vm.sh @@ -32,6 +32,8 @@ set -euo pipefail +MISE="${__MISE_EXE:-$(command -v mise 2>/dev/null || echo mise)}" + ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" PORT="${OPENSHELL_SERVER_PORT:-18081}" GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-vm-dev}" @@ -219,13 +221,13 @@ if [ ! -d "${COMPRESSED_DIR}" ] \ || ! find "${COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q . \ || [ ! -f "${COMPRESSED_DIR}/gvproxy.zst" ]; then echo "==> Preparing embedded VM runtime (mise run vm:setup)" - mise run vm:setup + "$MISE" run vm:setup fi if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then check_supervisor_cross_toolchain echo "==> Building bundled VM supervisor (mise run vm:supervisor)" - mise run vm:supervisor + "$MISE" run vm:supervisor fi export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${COMPRESSED_DIR}" @@ -236,8 +238,17 @@ if [[ -n "${CARGO_BUILD_JOBS:-}" ]]; then fi echo "==> Building openshell-gateway and openshell-driver-vm" -cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - -p openshell-server -p openshell-driver-vm +CARGO_CMD=(cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + -p openshell-server -p openshell-driver-vm) +if [ "$(id -u)" = "0" ] && [ -n "${SUDO_USER:-}" ]; then + chown -R "${SUDO_USER}" "${ROOT}/target" 2>/dev/null || true + sudo -u "${SUDO_USER}" env \ + "PATH=${PATH}" \ + "OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}" \ + "${CARGO_CMD[@]}" +else + "${CARGO_CMD[@]}" +fi if [ "$(uname -s)" = "Darwin" ]; then echo "==> Codesigning openshell-driver-vm (Hypervisor entitlement)" diff --git a/tasks/scripts/vm/build-nvidia-modules.sh b/tasks/scripts/vm/build-nvidia-modules.sh new file mode 100755 index 000000000..96287fc8c --- /dev/null +++ b/tasks/scripts/vm/build-nvidia-modules.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build NVIDIA kernel modules against the libkrunfw guest kernel. +# +# Uses the NVIDIA DKMS source already installed on the host (from the +# nvidia-dkms-* or nvidia-kernel-source-* package) and compiles it +# against the guest kernel tree produced by build-libkrun.sh. +# +# Prerequisites: +# - NVIDIA kernel source in /usr/src/nvidia-*/ +# - Guest kernel built with CONFIG_MODULES=y (mise run vm:setup) +# +# Output: +# target/libkrun-build/nvidia-modules/*.ko +# target/libkrun-build/nvidia-firmware//*.bin (if available) +# +# Usage: +# ./build-nvidia-modules.sh + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +BUILD_DIR="${ROOT}/target/libkrun-build" +OUTPUT_DIR="${BUILD_DIR}/nvidia-modules" + +# Guest kernel version — keep in sync with GUEST_KERNEL_VERSION in +# crates/openshell-driver-vm/src/rootfs.rs and the init script. +KERNEL_TREE="${BUILD_DIR}/libkrunfw/linux-6.12.76" +if [ ! -d "${KERNEL_TREE}" ]; then + echo "ERROR: Guest kernel tree not found at ${KERNEL_TREE}" >&2 + echo " Run: mise run vm:setup" >&2 + exit 1 +fi + +if ! grep -q 'CONFIG_MODULES=y' "${KERNEL_TREE}/.config" 2>/dev/null; then + echo "ERROR: Guest kernel was built without CONFIG_MODULES=y" >&2 + echo " Ensure openshell.kconfig includes CONFIG_MODULES=y and rebuild:" >&2 + echo " mise run vm:setup" >&2 + exit 1 +fi + +if [ ! -f "${KERNEL_TREE}/Module.symvers" ]; then + echo "ERROR: Module.symvers not found — the kernel needs to be rebuilt" >&2 + echo " with CONFIG_MODULES=y so the build produces Module.symvers." >&2 + echo " Run: mise run vm:setup" >&2 + exit 1 +fi + +# Detect the host NVIDIA driver version to pick a compatible module source. +HOST_DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-}" +if [ -z "${HOST_DRIVER_VERSION}" ]; then + HOST_DRIVER_VERSION="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null || true)" +fi +if [ -z "${HOST_DRIVER_VERSION}" ]; then + HOST_DRIVER_VERSION="$(modinfo -F version /lib/modules/$(uname -r)/updates/dkms/nvidia.ko 2>/dev/null || true)" +fi + +# Use the open-gpu-kernel-modules release matching the host driver major +# version. The open modules support newer kernels better than the +# proprietary DKMS source shipped in /usr/src/. +# Must match NVIDIA_DRIVER_VERSION in sandboxes/nvidia-gpu/versions.env +# and sandboxes/nvidia-gpu/Dockerfile ARG NVIDIA_DRIVER_VERSION +NVIDIA_OPEN_VERSION="${NVIDIA_OPEN_VERSION:-580.159.03}" +NVIDIA_SRC_DIR="${BUILD_DIR}/open-gpu-kernel-modules-${NVIDIA_OPEN_VERSION}" + +if [ ! -d "${NVIDIA_SRC_DIR}/kernel-open" ]; then + echo "==> Downloading NVIDIA open kernel modules ${NVIDIA_OPEN_VERSION}" + TARBALL="${BUILD_DIR}/nvidia-open-${NVIDIA_OPEN_VERSION}.tar.gz" + if [ ! -f "${TARBALL}" ]; then + curl -fSL \ + "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${NVIDIA_OPEN_VERSION}.tar.gz" \ + -o "${TARBALL}" + # TODO(gpu): Add SHA-256 verification for supply chain integrity. + # echo " ${TARBALL}" | sha256sum -c - + fi + echo " Extracting..." + tar -xzf "${TARBALL}" -C "${BUILD_DIR}" + echo " Source: ${NVIDIA_SRC_DIR}" +fi + +NVIDIA_SRC="${NVIDIA_SRC_DIR}" + +# Patch API incompatibilities with newer kernels. +# __flush_tlb() was removed in kernel 6.12; use __flush_tlb_all() instead. +NV_PAT="${NVIDIA_SRC}/kernel-open/nvidia/nv-pat.c" +if [ -f "${NV_PAT}" ] && grep -q '__flush_tlb()' "${NV_PAT}"; then + echo "==> Patching nv-pat.c (__flush_tlb -> __flush_tlb_all)" + sed -i 's/__flush_tlb()/__flush_tlb_all()/g' "${NV_PAT}" +fi + +echo "==> Building NVIDIA ${NVIDIA_OPEN_VERSION} open kernel modules for guest kernel 6.12.76" +echo " NVIDIA source: ${NVIDIA_SRC}" +echo " Kernel tree: ${KERNEL_TREE}" +echo " Output: ${OUTPUT_DIR}" +if [ -n "${HOST_DRIVER_VERSION}" ]; then + echo " Host driver: ${HOST_DRIVER_VERSION}" +fi +echo "" + +mkdir -p "${OUTPUT_DIR}" + +NPROC="$(nproc 2>/dev/null || echo 4)" +IGNORE_CC_MISMATCH=1 make -C "${NVIDIA_SRC}" \ + SYSSRC="${KERNEL_TREE}" \ + SYSOUT="${KERNEL_TREE}" \ + -j"${NPROC}" \ + modules 2>&1 | tail -30 + +echo "" +echo "==> Collecting .ko files" + +# Open modules build into kernel-open// +for subdir in nvidia nvidia-uvm nvidia-modeset nvidia-drm nvidia-peermem; do + for search in "${NVIDIA_SRC}/kernel-open/${subdir}" "${NVIDIA_SRC}/${subdir}"; do + ko_file="${search}/${subdir//-/_}.ko" + if [ -f "${ko_file}" ]; then + cp "${ko_file}" "${OUTPUT_DIR}/" + echo " $(basename "${ko_file}") ($(du -h "${ko_file}" | cut -f1))" + break + fi + done +done + +# Also check for flat layouts. +for ko in "${NVIDIA_SRC}"/*.ko "${NVIDIA_SRC}"/kernel-open/*.ko; do + [ -f "${ko}" ] || continue + base="$(basename "${ko}")" + [ -f "${OUTPUT_DIR}/${base}" ] && continue + cp "${ko}" "${OUTPUT_DIR}/" + echo " ${base} ($(du -h "${ko}" | cut -f1))" +done + +KO_COUNT=$(find "${OUTPUT_DIR}" -name '*.ko' | wc -l) +if [ "${KO_COUNT}" -eq 0 ]; then + echo "ERROR: No .ko files produced. Check build output above." >&2 + exit 1 +fi + +echo "" +echo "==> Collecting firmware" + +# GSP firmware is included in the open-gpu-kernel-modules source tree. +FW_SRC="${NVIDIA_SRC}/src/nvidia/firmware" +FW_OUTPUT="${BUILD_DIR}/nvidia-firmware/${NVIDIA_OPEN_VERSION}" +if [ -d "${FW_SRC}" ] && ls "${FW_SRC}"/*.bin >/dev/null 2>&1; then + mkdir -p "${FW_OUTPUT}" + cp "${FW_SRC}"/*.bin "${FW_OUTPUT}/" + FW_COUNT=$(find "${FW_OUTPUT}" -name '*.bin' 2>/dev/null | wc -l) + echo " Copied ${FW_COUNT} firmware files from source tree" +else + # Fall back to host firmware. + HOST_FW="" + for candidate in "/lib/firmware/nvidia/${HOST_DRIVER_VERSION}" /lib/firmware/nvidia; do + if [ -d "${candidate}" ] && ls "${candidate}"/*.bin >/dev/null 2>&1; then + HOST_FW="${candidate}" + break + fi + done + if [ -n "${HOST_FW}" ]; then + mkdir -p "${FW_OUTPUT}" + cp -r "${HOST_FW}"/* "${FW_OUTPUT}/" 2>/dev/null || true + FW_COUNT=$(find "${FW_OUTPUT}" -name '*.bin' 2>/dev/null | wc -l) + echo " Copied ${FW_COUNT} firmware files from host ${HOST_FW}" + else + echo " WARNING: No firmware found. GPU guests may fail without GSP firmware." + fi +fi + +echo "" +echo "==> Done! ${KO_COUNT} kernel modules built for guest kernel 6.12.76." +echo " The VM driver will auto-discover them at:" +echo " ${OUTPUT_DIR}" +echo "" +echo " Next: mise run gateway:vm -- --gpu" diff --git a/tasks/scripts/vm/build-supervisor-bundle.sh b/tasks/scripts/vm/build-supervisor-bundle.sh index 90f5b517d..a823201d7 100755 --- a/tasks/scripts/vm/build-supervisor-bundle.sh +++ b/tasks/scripts/vm/build-supervisor-bundle.sh @@ -133,14 +133,31 @@ run_supervisor_build() { cargo_prefix=(env -u RUSTC_WRAPPER) fi - if command -v cargo-zigbuild >/dev/null 2>&1; then - "${cargo_prefix[@]}" cargo zigbuild --release -p openshell-sandbox --target "${RUST_TARGET}" \ - --manifest-path "${ROOT}/Cargo.toml" - else + # When running under sudo, de-escalate the build to the original user. + # The target/ dir is owned by that user and root may lack write access + # (e.g. NFS root_squash). Only the final gateway execution needs root. + # Pass PATH explicitly so cargo/rustc/sccache remain reachable. + # Also reclaim any root-owned artifacts left by prior sudo builds. + if [ "$(id -u)" = "0" ] && [ -n "${SUDO_USER:-}" ]; then + if [ -d "${ROOT}/target" ]; then + chown -R "${SUDO_USER}" "${ROOT}/target" 2>/dev/null || true + fi + cargo_prefix=(sudo -u "${SUDO_USER}" env "PATH=${PATH}" "${cargo_prefix[@]}") + fi + + local host_arch + host_arch="$(uname -m)" + local cargo_build_cmd="build" + local cargo_bin="cargo" + + if [ "${host_arch}" != "${GUEST_ARCH}" ] && command -v cargo-zigbuild >/dev/null 2>&1; then + cargo_build_cmd="zigbuild" + elif [ "${host_arch}" != "${GUEST_ARCH}" ]; then echo " cargo-zigbuild not found, falling back to cargo build..." - "${cargo_prefix[@]}" cargo build --release -p openshell-sandbox --target "${RUST_TARGET}" \ - --manifest-path "${ROOT}/Cargo.toml" fi + + "${cargo_prefix[@]}" ${cargo_bin} ${cargo_build_cmd} --release -p openshell-sandbox \ + --target "${RUST_TARGET}" --manifest-path "${ROOT}/Cargo.toml" } print_build_failure() { diff --git a/tasks/vm.toml b/tasks/vm.toml index e9eb22561..d288dd2e3 100644 --- a/tasks/vm.toml +++ b/tasks/vm.toml @@ -45,6 +45,10 @@ run = "tasks/scripts/vm/build-supervisor-bundle.sh" description = "Build the VM rootfs tarball (use -- --base for lightweight)" run = "tasks/scripts/vm/build-rootfs-tarball.sh" +["vm:nvidia-modules"] +description = "Build NVIDIA kernel modules for the guest VM kernel" +run = "tasks/scripts/vm/build-nvidia-modules.sh" + ["vm:clean"] description = "Remove all VM cached artifacts (runtime, rootfs, builds)" run = "tasks/scripts/vm/vm-clean.sh"