diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 5563a67eb..3ccce68db 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -636,12 +636,20 @@ jobs: checkout-ref: ${{ github.sha }} secrets: inherit + build-rpm: + name: Build RPM Packages + needs: [compute-versions] + uses: ./.github/workflows/rpm-package.yml + with: + checkout-ref: ${{ github.sha }} + secrets: inherit + # --------------------------------------------------------------------------- # Create / update the dev GitHub Release with CLI binaries and wheels # --------------------------------------------------------------------------- release-dev: name: Release Dev - needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, build-deb] + needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, build-deb, build-rpm] runs-on: build-amd64 timeout-minutes: 10 outputs: @@ -684,6 +692,13 @@ jobs: path: release/ merge-multiple: true + - name: Download RPM package artifacts + uses: actions/download-artifact@v4 + with: + pattern: rpm-linux-* + path: release/ + merge-multiple: true + - name: Capture wheel filenames id: wheel_filenames run: | @@ -701,6 +716,7 @@ jobs: openshell-aarch64-unknown-linux-musl.tar.gz \ openshell-aarch64-apple-darwin.tar.gz \ openshell_*.deb \ + openshell-*.rpm \ *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt sha256sum \ @@ -713,7 +729,7 @@ jobs: openshell-sandbox-aarch64-unknown-linux-gnu.tar.gz > openshell-sandbox-checksums-sha256.txt cat openshell-sandbox-checksums-sha256.txt - - name: Prune stale wheel and deb assets from dev release + - name: Prune stale wheel, deb, and rpm assets from dev release uses: actions/github-script@v7 env: WHEEL_VERSION: ${{ needs.compute-versions.outputs.python_version }} @@ -745,13 +761,17 @@ jobs: core.info(` ${String(a.id).padStart(12)} ${a.name}`); } - // Delete stale wheels - let kept = 0, deleted = 0, debDeleted = 0; + // Delete stale wheels, debs, and rpms + let kept = 0, deleted = 0, debDeleted = 0, rpmDeleted = 0; for (const asset of assets) { if (asset.name.endsWith('.deb')) { core.info(`Deleting stale deb package: ${asset.name} (id=${asset.id})`); await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); debDeleted++; + } else if (asset.name.endsWith('.rpm')) { + core.info(`Deleting stale rpm package: ${asset.name} (id=${asset.id})`); + await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); + rpmDeleted++; } else if (asset.name.endsWith('.whl') && asset.name.startsWith(currentPrefix)) { core.info(`Keeping current wheel: ${asset.name}`); kept++; @@ -761,7 +781,7 @@ jobs: deleted++; } } - core.info(`Summary: kept_wheels=${kept}, deleted_wheels=${deleted}, deleted_debs=${debDeleted}`); + core.info(`Summary: kept_wheels=${kept}, deleted_wheels=${deleted}, deleted_debs=${debDeleted}, deleted_rpms=${rpmDeleted}`); - name: Move dev tag run: | @@ -793,6 +813,7 @@ jobs: release/openshell-aarch64-unknown-linux-musl.tar.gz release/openshell-aarch64-apple-darwin.tar.gz release/openshell_*.deb + release/openshell-*.rpm release/openshell-gateway-x86_64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-apple-darwin.tar.gz diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index 7df792cba..79d7cbf61 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -663,12 +663,20 @@ jobs: checkout-ref: ${{ inputs.tag || github.ref }} secrets: inherit + build-rpm: + name: Build RPM Packages + needs: [compute-versions] + uses: ./.github/workflows/rpm-package.yml + with: + checkout-ref: ${{ inputs.tag || github.ref }} + secrets: inherit + # --------------------------------------------------------------------------- # Create a tagged GitHub Release with CLI binaries and wheels # --------------------------------------------------------------------------- release: name: Release - needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, tag-ghcr-release, build-deb] + needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, tag-ghcr-release, build-deb, build-rpm] runs-on: build-amd64 timeout-minutes: 10 outputs: @@ -713,6 +721,13 @@ jobs: path: release/ merge-multiple: true + - name: Download RPM package artifacts + uses: actions/download-artifact@v4 + with: + pattern: rpm-linux-* + path: release/ + merge-multiple: true + - name: Capture wheel filenames id: wheel_filenames run: | @@ -730,6 +745,7 @@ jobs: openshell-aarch64-unknown-linux-musl.tar.gz \ openshell-aarch64-apple-darwin.tar.gz \ openshell_*.deb \ + openshell-*.rpm \ *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt sha256sum \ @@ -763,6 +779,7 @@ jobs: release/openshell-aarch64-unknown-linux-musl.tar.gz release/openshell-aarch64-apple-darwin.tar.gz release/openshell_*.deb + release/openshell-*.rpm release/openshell-gateway-x86_64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-apple-darwin.tar.gz diff --git a/.github/workflows/rpm-package.yml b/.github/workflows/rpm-package.yml new file mode 100644 index 000000000..b80882d75 --- /dev/null +++ b/.github/workflows/rpm-package.yml @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: RPM Package + +on: + workflow_call: + inputs: + checkout-ref: + required: true + type: string + +permissions: + contents: read + +defaults: + run: + shell: bash + +jobs: + build-rpm-linux: + name: Build RPM Package (Linux ${{ matrix.arch }}) + strategy: + matrix: + include: + - arch: x86_64 + runner: build-amd64 + - arch: aarch64 + runner: build-arm64 + runs-on: ${{ matrix.runner }} + timeout-minutes: 60 + container: + image: fedora:latest + steps: + - name: Install build dependencies + run: | + dnf install -y \ + packit rpm-build \ + rust cargo gcc gcc-c++ make cmake pkg-config \ + clang-devel z3-devel systemd-rpm-macros \ + pandoc python3-devel git-core \ + cargo-rpm-macros + + - uses: actions/checkout@v6 + with: + ref: ${{ inputs.checkout-ref }} + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Build RPMs via Packit + run: packit build locally + + - name: Collect RPM artifacts + run: | + set -euo pipefail + mkdir -p artifacts + find ~/rpmbuild/RPMS/ -name '*.rpm' -exec cp {} artifacts/ \; + echo "=== Built RPMs ===" + ls -lah artifacts/ + + - name: Upload RPM artifacts + uses: actions/upload-artifact@v4 + with: + name: rpm-linux-${{ matrix.arch }} + path: artifacts/*.rpm + retention-days: 5 diff --git a/.gitignore b/.gitignore index 915c90d9d..1b37bfd49 100644 --- a/.gitignore +++ b/.gitignore @@ -206,5 +206,11 @@ rfc.md .worktrees .z3-trace +# RPM build artifacts +*.src.rpm +*.tar.gz +*.tar.xz +*.tar.bz2 + # Markdown/mermaid lint tooling deps scripts/lint-mermaid/node_modules/ diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc index 3f340fbc2..4c7f68e5a 100644 --- a/.markdownlint-cli2.jsonc +++ b/.markdownlint-cli2.jsonc @@ -10,7 +10,11 @@ ".opencode/**", ".github/**", "THIRD-PARTY-NOTICES/**", - "CLAUDE.md" + "CLAUDE.md", + // Man page sources use pandoc markdown with multiple H1 sections + // (NAME, SYNOPSIS, DESCRIPTION, etc.) which is standard for man + // pages but violates MD025. + "deploy/man/**" ], "config": { "default": true, diff --git a/.packit.yaml b/.packit.yaml new file mode 100644 index 000000000..d9de04b2f --- /dev/null +++ b/.packit.yaml @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Packit configuration for OpenShell RPM builds via Fedora COPR. +# See https://packit.dev/docs/configuration for full reference. + +upstream_tag_template: "v{version}" +upstream_package_name: openshell +downstream_package_name: openshell +specfile_path: openshell.spec + +# Packages needed in the SRPM build environment to create vendor tarball +srpm_build_deps: + - rust + - cargo + - git-core + +actions: + get-current-version: + # Derive version from the latest upstream tag on the current branch. + - 'bash -c "git describe --tags --match ''v*'' --abbrev=0 HEAD | sed ''s/^v//''"' + + create-archive: + # Step 1: Create source tarball from git working tree. + # Uses git ls-files + tar instead of git archive so the tarball + # reflects any patching that Packit may have done (e.g. version bumps). + - 'bash -c "VERSION=${PACKIT_PROJECT_VERSION} && TMPDIR=$(mktemp -d) && DIR=openshell-${VERSION} && mkdir -p ${TMPDIR}/${DIR} && git ls-files -z | xargs -0 tar cf - | tar xf - -C ${TMPDIR}/${DIR}/ && tar -czf openshell-${VERSION}.tar.gz -C ${TMPDIR} ${DIR} && rm -rf ${TMPDIR}"' + # Step 2: Create vendored Cargo dependencies tarball for offline RPM build. + - 'bash -c "VERSION=${PACKIT_PROJECT_VERSION} && cargo vendor --quiet && tar -cJf openshell-${VERSION}-vendor.tar.xz vendor/ && rm -rf vendor/"' + # Step 3: Return BOTH archive names. Packit maps each line to Source0, Source1, etc. + - 'bash -c "echo openshell-${PACKIT_PROJECT_VERSION}.tar.gz && echo openshell-${PACKIT_PROJECT_VERSION}-vendor.tar.xz"' + + fix-spec-file: + # Update Source0 to the generated tarball name + - 'bash -c "sed -i \"s|^Source0:.*|Source0: openshell-${PACKIT_PROJECT_VERSION}.tar.gz|\" openshell.spec"' + # Update Source1 to the generated vendor tarball name + - 'bash -c "sed -i \"s|^Source1:.*|Source1: openshell-${PACKIT_PROJECT_VERSION}-vendor.tar.xz|\" openshell.spec"' + # Update Version + - 'bash -c "sed -i -r \"s/^Version:(\\s*)\\S+/Version:\\1${PACKIT_RPMSPEC_VERSION}/\" openshell.spec"' + # Update Release + - 'bash -c "sed -i -r \"s/^Release:(\\s*)\\S+/Release:\\1${PACKIT_RPMSPEC_RELEASE}%{?dist}/\" openshell.spec"' + +jobs: + # Build on every pull request targeting main for CI validation + - job: copr_build + trigger: pull_request + branch: main + identifier: main-pr + targets: + - fedora-all + - epel-10 + + # Build into maxamillion/openshell on every commit to main + # for continuous development and testing builds. + - job: copr_build + trigger: commit + branch: main + owner: "maxamillion" + project: "openshell" + identifier: main-commit + targets: + - fedora-all + - epel-10 + preserve_project: true + list_on_homepage: true + + # Build on GitHub releases for publishable RPMs. + # See: https://packit.dev/docs/configuration/upstream/copr_build#using-a-custom-copr-project + - job: copr_build + trigger: release + owner: "maxamillion" + project: "openshell" + targets: + - fedora-all + - epel-10 + preserve_project: true + list_on_homepage: true diff --git a/architecture/podman-driver.md b/architecture/podman-driver.md index 155937a77..67b86861d 100644 --- a/architecture/podman-driver.md +++ b/architecture/podman-driver.md @@ -100,7 +100,19 @@ sequenceDiagram C->>C: entrypoint: /opt/openshell/bin/openshell-sandbox ``` -The supervisor image is a `FROM scratch` image containing only the prebuilt `openshell-sandbox` binary. It is built by the `supervisor-output` target in `deploy/docker/Dockerfile.images`. The `image_volumes` field in the container spec mounts this image's filesystem at `/opt/openshell/bin` with `rw: false`, making it a read-only overlay that the sandbox cannot tamper with. +The supervisor image is a `FROM scratch` image containing only the prebuilt `openshell-sandbox` binary. It is built by the `supervisor` target in `deploy/docker/Dockerfile.images`. The `image_volumes` field in the container spec mounts this image's filesystem at `/opt/openshell/bin` with `rw: false`, making it a read-only overlay that the sandbox cannot tamper with. + +## TLS + +When the Podman driver's TLS configuration is set (`tls_ca`, `tls_cert`, `tls_key` in `PodmanComputeConfig`), the driver: + +1. Switches the auto-detected endpoint scheme from `http://` to `https://` +2. Bind-mounts the client cert files (read-only) into the container at `/etc/openshell/tls/client/` +3. Sets `OPENSHELL_TLS_CA`, `OPENSHELL_TLS_CERT`, `OPENSHELL_TLS_KEY` env vars pointing to the container-side paths + +The supervisor reads these env vars and uses them to establish an mTLS connection back to the gateway. + +The RPM packaging auto-generates a self-signed PKI on first start via `init-pki.sh`. Client certs are placed in the CLI auto-discovery directory (`~/.config/openshell/gateways/openshell/mtls/`) so the CLI connects with mTLS without manual configuration. See `deploy/rpm/CONFIGURATION.md` for the full RPM configuration reference and `deploy/rpm/QUICKSTART.md` for the quick start guide. ## Network Model diff --git a/architecture/podman-rootless-networking.md b/architecture/podman-rootless-networking.md index d13b9ca84..54ff882f5 100644 --- a/architecture/podman-rootless-networking.md +++ b/architecture/podman-rootless-networking.md @@ -306,37 +306,52 @@ Supervisor proxy (10.200.0.1:3128 in container netns) ### Supervisor gRPC Callback to Gateway +The Podman driver auto-detects the callback endpoint scheme based on +whether TLS client certificates are configured. When the RPM's +auto-generated PKI is in place, the endpoint is +`https://host.containers.internal:8080` and the supervisor connects +with mTLS. Without TLS configuration, it falls back to +`http://host.containers.internal:8080`. + ```text Supervisor (container netns, 10.89.x.2) | - 1. gRPC connect to http://host.containers.internal:8080 + 1. mTLS connect to https://host.containers.internal:8080 (resolves to 169.254.1.2:8080 via /etc/hosts) + Client cert bind-mounted from host at /etc/openshell/tls/client/ | 2. Routed through container default gateway (bridge) | 3. Pasta translates: L2 frame -> host L4 socket - (pasta host-gateway mapping: 169.254.1.2 -> 127.0.0.1) | - 4. Host TCP socket connects to 127.0.0.1:8080 + 4. Host TCP socket connects to gateway (0.0.0.0:8080) | -Gateway (host, port 8080) +Gateway (host, 0.0.0.0:8080, mTLS enabled) | - 5. ConnectSupervisor bidirectional stream established - 6. Heartbeats every N seconds (gateway sends interval in SessionAccepted, default 15s) - 7. Reconnects with exponential backoff (1s initial, 30s max) on failure - 8. Same gRPC channel reused for RelayStream calls (no new TLS handshake) + 5. TLS handshake: server presents server cert, client presents client cert + 6. ConnectSupervisor bidirectional stream established + 7. Heartbeats every N seconds (gateway sends interval in SessionAccepted, default 15s) + 8. Reconnects with exponential backoff (1s initial, 30s max) on failure + 9. Same gRPC channel reused for RelayStream calls (no new TLS handshake) ``` +The gateway binds to `0.0.0.0` by default in the RPM packaging. mTLS +prevents unauthenticated access even though the gateway is reachable +from the network. Client certificates are auto-generated by +`init-pki.sh` on first start and bind-mounted into sandbox containers +by the Podman driver. See `deploy/rpm/CONFIGURATION.md` for the full +configuration reference. + ## Differences from the Kubernetes Driver | Aspect | Kubernetes | Podman (rootless pasta) | |--------|-----------|----------------------| | Container/Pod IP | Routable cluster-wide | Non-routable (10.89.x.x inside user namespace) | -| Network reachability | Pod IPs reachable from gateway | Bridge not routable from host; requires pasta port forwarding or `host.containers.internal` | -| Sandbox -> Gateway | Direct TCP to K8s service IP | `host.containers.internal` (169.254.1.2 via pasta) | +| Network reachability | Pod IPs reachable from gateway | Bridge not routable from host; requires `host.containers.internal` | +| Sandbox -> Gateway | Direct TCP to K8s service IP | `host.containers.internal` via bridge + pasta | | SSH transport | Reverse gRPC relay (`ConnectSupervisor` + `RelayStream`) -- same mechanism as Podman | Reverse gRPC relay (`ConnectSupervisor` + `RelayStream`) | | Port publishing | Not needed (routable IPs) | Ephemeral host port via pasta port forwarding | -| TLS | mTLS via K8s secrets | Disabled by default (loopback-only, `--disable-tls`) | +| TLS | mTLS via K8s secrets | mTLS via auto-generated PKI (RPM default) or `--disable-tls` | | DNS | Kubernetes CoreDNS | Podman bridge DNS (aardvark-dns, `dns_enabled: true`) | | Network policy | K8s NetworkPolicy (ingress restricted to gateway) | iptables inside inner sandbox netns | | Supervisor delivery | hostPath volume from k3s node | OCI image volume mount (FROM scratch image) | diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index beadcbeac..2bf1352d4 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -65,6 +65,18 @@ pub struct GatewayMetadata { /// When set, tokens will include these scopes for fine-grained access control. #[serde(default, skip_serializing_if = "Option::is_none")] pub oidc_scopes: Option, + + /// Whether the CLI manages this gateway's full lifecycle (deploy, + /// stop, destroy). + /// + /// - `Some(true)` — deployed via `gateway start`; destroy/stop operate on + /// the underlying container or VM. + /// - `Some(false)` — registered via `gateway add`; destroy/stop only remove + /// the local registration metadata. + /// - `None` — legacy metadata written before this field existed; the CLI + /// falls back to the previous heuristic (`is_remote`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub client_lifecycle_managed: Option, } impl GatewayMetadata { @@ -153,6 +165,7 @@ pub fn create_gateway_metadata_with_host( remote_host, resolved_host, auth_mode: disable_tls.then(|| "plaintext".to_string()), + client_lifecycle_managed: Some(true), ..Default::default() } } @@ -502,6 +515,53 @@ mod tests { assert!(parsed.resolved_host.is_none()); } + #[test] + fn metadata_deserialize_without_client_lifecycle_managed_field() { + // Legacy metadata files won't have the client_lifecycle_managed field. + // Ensure backwards compatibility: defaults to None. + let json = r#"{ + "name": "test", + "gateway_endpoint": "https://127.0.0.1:8080", + "is_remote": false, + "gateway_port": 8080 + }"#; + let parsed: GatewayMetadata = serde_json::from_str(json).unwrap(); + assert_eq!(parsed.client_lifecycle_managed, None); + } + + #[test] + fn metadata_roundtrip_with_client_lifecycle_managed_field() { + let meta = GatewayMetadata { + name: "test".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + gateway_port: 8080, + client_lifecycle_managed: Some(false), + ..Default::default() + }; + let json = serde_json::to_string(&meta).unwrap(); + assert!(json.contains(r#""client_lifecycle_managed":false"#)); + let parsed: GatewayMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.client_lifecycle_managed, Some(false)); + } + + #[test] + fn metadata_omits_client_lifecycle_managed_when_none() { + let meta = GatewayMetadata { + name: "test".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + gateway_port: 8080, + ..Default::default() + }; + let json = serde_json::to_string(&meta).unwrap(); + assert!(!json.contains("client_lifecycle_managed")); + } + + #[test] + fn create_gateway_metadata_sets_client_lifecycle_managed_true() { + let meta = create_gateway_metadata("test", None, 8080); + assert_eq!(meta.client_lifecycle_managed, Some(true)); + } + #[test] fn local_gateway_metadata_with_gateway_host_override() { let meta = create_gateway_metadata_with_host( diff --git a/crates/openshell-cli/src/completers.rs b/crates/openshell-cli/src/completers.rs index d5d9a0a88..d8ba3ff93 100644 --- a/crates/openshell-cli/src/completers.rs +++ b/crates/openshell-cli/src/completers.rs @@ -178,6 +178,7 @@ mod tests { gateway_endpoint: "https://alpha.example.com".to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }, ) diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index ccad7a099..59f41e739 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2976,6 +2976,7 @@ mod tests { gateway_endpoint: endpoint.to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index eaadf7908..ab9a9db65 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -865,6 +865,31 @@ fn is_loopback_gateway_endpoint(endpoint: &str) -> bool { } } +/// Check whether mTLS client certs exist on disk for the gateway that +/// would serve this endpoint. +/// +/// Loopback endpoints (`localhost`, `127.0.0.1`, `::1`) resolve to the +/// `"openshell"` gateway name, matching the convention used by +/// `init-pki.sh` and the TLS cert resolver in `tls.rs`. +fn mtls_certs_exist_for_endpoint(name: &str, endpoint: &str) -> bool { + let cert_name = if is_loopback_gateway_endpoint(endpoint) { + "openshell" + } else { + name + }; + openshell_core::paths::xdg_config_dir() + .is_ok_and(|d| { + let mtls = d + .join("openshell") + .join("gateways") + .join(cert_name) + .join("mtls"); + mtls.join("ca.crt").is_file() + && mtls.join("tls.crt").is_file() + && mtls.join("tls.key").is_file() + }) +} + fn plaintext_gateway_is_remote(endpoint: &str, remote: Option<&str>, local: bool) -> bool { if local { return false; @@ -895,6 +920,7 @@ fn plaintext_gateway_metadata( remote_host, resolved_host, auth_mode: Some("plaintext".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } @@ -1024,9 +1050,14 @@ pub async fn gateway_add( } // Derive a gateway name from the hostname when none is provided. + // Loopback endpoints use the canonical "openshell" name, matching the + // convention in init-pki.sh, default_tls_dir, and bootstrap. let derived_name; let name = if let Some(n) = name { n + } else if is_loopback_gateway_endpoint(&endpoint) { + derived_name = "openshell".to_string(); + &derived_name } else { // Parse out just the host portion of the URL. derived_name = url::Url::parse(&endpoint) @@ -1134,6 +1165,24 @@ pub async fn gateway_add( } if endpoint.starts_with("http://") { + // Warn if mTLS certs exist for this gateway — the user likely + // meant to use https:// instead of http://. + let has_mtls_certs = mtls_certs_exist_for_endpoint(name, &endpoint); + + if has_mtls_certs { + let https_endpoint = endpoint.replacen("http://", "https://", 1); + let suggestion = if is_loopback_gateway_endpoint(&endpoint) { + format!("openshell gateway add --local {https_endpoint}") + } else { + format!("openshell gateway add {https_endpoint}") + }; + eprintln!( + "{} mTLS certificates found for gateway '{name}'. Did you mean to use https?", + "⚠".yellow().bold(), + ); + eprintln!(" Try: {suggestion}"); + } + let metadata = plaintext_gateway_metadata(name, &endpoint, remote, local); let gateway_type = gateway_type_label(&metadata); let gateway_auth = gateway_auth_label(&metadata); @@ -1141,6 +1190,21 @@ pub async fn gateway_add( store_gateway_metadata(name, &metadata)?; save_active_gateway(name)?; + // Verify the gateway is reachable. + let tls = TlsOptions::default(); + match http_health_check(&endpoint, &tls).await { + Ok(Some(status)) if status.is_success() => {} + _ => { + eprintln!( + "{} Gateway is not reachable at {endpoint}", + "⚠".yellow().bold(), + ); + if !has_mtls_certs { + eprintln!(" Verify the gateway is running and the endpoint is correct."); + } + } + } + eprintln!( "{} Gateway '{}' added and set as active", "✓".green().bold(), @@ -1167,10 +1231,20 @@ pub async fn gateway_add( // is not registered. Pass the endpoint port so the container can be // identified by its host port binding when multiple gateways run on // the same Docker host. - let endpoint_port = url::Url::parse(&endpoint).ok().and_then(|u| u.port()); - eprintln!("• Extracting TLS certificates from gateway container..."); - openshell_bootstrap::extract_and_store_pki(name, remote_opts.as_ref(), endpoint_port) - .await?; + // + // Skip extraction when client certs are already on disk (e.g., + // RPM/systemd deployments where init-pki.sh pre-provisions them + // before the gateway starts). + let certs_on_disk = mtls_certs_exist_for_endpoint(name, &endpoint); + + if certs_on_disk { + eprintln!("• TLS certificates already present, skipping extraction"); + } else { + let endpoint_port = url::Url::parse(&endpoint).ok().and_then(|u| u.port()); + eprintln!("• Extracting TLS certificates from gateway container..."); + openshell_bootstrap::extract_and_store_pki(name, remote_opts.as_ref(), endpoint_port) + .await?; + } let (remote_host, resolved_host) = remote.map_or((None, None), |dest| { let ssh_host = extract_host_from_ssh_destination(dest); @@ -1186,12 +1260,25 @@ pub async fn gateway_add( remote_host, resolved_host, auth_mode: Some("mtls".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }; store_gateway_metadata(name, &metadata)?; save_active_gateway(name)?; + // Verify the gateway is reachable over mTLS. + let tls = TlsOptions::default().with_gateway_name(name); + match http_health_check(&endpoint, &tls).await { + Ok(Some(status)) if status.is_success() => {} + _ => { + eprintln!( + "{} Gateway is not reachable at {endpoint}. Verify the gateway is running.", + "⚠".yellow().bold(), + ); + } + } + eprintln!( "{} Gateway '{}' added and set as active", "✓".green().bold(), @@ -1203,7 +1290,15 @@ pub async fn gateway_add( "Type:".dimmed(), if local { "local" } else { "remote" }, ); - eprintln!("{} TLS certificates extracted", "✓".green().bold()); + eprintln!( + "{} TLS certificates {}", + "✓".green().bold(), + if certs_on_disk { + "already present" + } else { + "extracted" + } + ); } else { // Cloud (edge-authenticated) gateway. let metadata = GatewayMetadata { @@ -1211,6 +1306,7 @@ pub async fn gateway_add( gateway_endpoint: endpoint.clone(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }; @@ -1736,10 +1832,20 @@ fn resolve_gateway_control_target_from( } match metadata { - Some(metadata) if metadata.is_remote => metadata.remote_host.map_or( + // Not client-managed (`gateway add`) — the gateway lifecycle is + // managed externally (e.g. systemd, Podman, bare-metal); only + // remove the local registration metadata on destroy/stop. + Some(ref m) if m.client_lifecycle_managed == Some(false) => { + GatewayControlTarget::ExternalRegistration + } + // Remote gateway with SSH destination — managed remote container. + Some(ref m) if m.is_remote => m.remote_host.clone().map_or( GatewayControlTarget::ExternalRegistration, GatewayControlTarget::Remote, ), + // Client-managed (`gateway start`) or legacy metadata (no + // `client_lifecycle_managed` field) — treat as a + // locally-managed container. _ => GatewayControlTarget::Local, } } @@ -5790,6 +5896,7 @@ mod tests { gateway_endpoint: endpoint.to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } @@ -6093,6 +6200,49 @@ mod tests { } } + #[test] + fn resolve_gateway_control_target_non_managed_loopback_is_external() { + // A gateway registered via `gateway add http://localhost:8080` should + // be classified as an external registration, not a local container. + let metadata = GatewayMetadata { + name: "localhost".to_string(), + gateway_endpoint: "http://localhost:8080".to_string(), + auth_mode: Some("plaintext".to_string()), + client_lifecycle_managed: Some(false), + ..Default::default() + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::ExternalRegistration)); + } + + #[test] + fn resolve_gateway_control_target_managed_gateway_is_local() { + // A gateway deployed via `gateway start` should be classified as local. + let metadata = GatewayMetadata { + name: "openshell".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + gateway_port: 8080, + client_lifecycle_managed: Some(true), + ..Default::default() + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::Local)); + } + + #[test] + fn resolve_gateway_control_target_legacy_metadata_defaults_to_local() { + // Legacy metadata without the `client_lifecycle_managed` field + // should preserve the existing behavior: non-remote → Local. + let metadata = GatewayMetadata { + name: "openshell".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + gateway_port: 8080, + ..Default::default() + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::Local)); + } + #[test] fn gateway_select_uses_explicit_name_without_prompting() { let tmpdir = tempfile::tempdir().expect("create tmpdir"); @@ -6263,11 +6413,14 @@ mod tests { .expect("register plaintext gateway"); }); - let metadata = load_gateway_metadata("127.0.0.1").expect("load stored gateway"); + // Loopback endpoints derive the canonical "openshell" gateway + // name, matching init-pki.sh and default_tls_dir conventions. + let metadata = load_gateway_metadata("openshell").expect("load stored gateway"); assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext")); assert!(!metadata.is_remote); + assert_eq!(metadata.client_lifecycle_managed, Some(false)); assert_eq!(metadata.gateway_endpoint, "http://127.0.0.1:8080"); - assert_eq!(load_active_gateway().as_deref(), Some("127.0.0.1")); + assert_eq!(load_active_gateway().as_deref(), Some("openshell")); }); } @@ -6295,6 +6448,7 @@ mod tests { let metadata = load_gateway_metadata("dev-http").expect("load stored gateway"); assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext")); assert!(!metadata.is_remote); + assert_eq!(metadata.client_lifecycle_managed, Some(false)); assert_eq!(metadata.gateway_endpoint, "http://gateway.example.com:8080"); assert_eq!(load_active_gateway().as_deref(), Some("dev-http")); }); diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index 8b8df5b89..5a806c343 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -61,8 +61,9 @@ const HOST_OPENSHELL_INTERNAL_HOSTS_ENTRY: &str = "host.openshell.internal:127.0 /// explicit `--docker-supervisor-bin` override or local build is available. const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; -/// Path to the supervisor binary inside the `openshell/supervisor` image. -const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/usr/local/bin/openshell-sandbox"; +/// Path to the supervisor binary inside the `openshell/supervisor` image +/// (a `FROM scratch` image containing only the binary). +const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/openshell-sandbox"; /// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference /// used when no supervisor binary override is provided. @@ -1431,8 +1432,8 @@ fn linux_supervisor_candidates(daemon_arch: &str) -> Vec { } /// Pull the supervisor image (if not already local), extract -/// `/usr/local/bin/openshell-sandbox` to a host cache keyed by the image's -/// content digest, and return the cache path. +/// `/openshell-sandbox` to a host cache keyed by the image's content +/// digest, and return the cache path. /// /// The extraction is atomic: the binary is written to a sibling temp file /// inside the digest-keyed directory and renamed into place, so concurrent @@ -1528,7 +1529,7 @@ async fn extract_supervisor_binary_bytes(docker: &Docker, image: &str) -> CoreRe ), ContainerCreateBody { image: Some(image.to_string()), - entrypoint: Some(vec!["/bin/true".to_string()]), + entrypoint: Some(vec!["/openshell-sandbox".to_string()]), cmd: Some(Vec::new()), ..Default::default() }, diff --git a/crates/openshell-driver-podman/src/client.rs b/crates/openshell-driver-podman/src/client.rs index 12ea0901f..69bfd69c0 100644 --- a/crates/openshell-driver-podman/src/client.rs +++ b/crates/openshell-driver-podman/src/client.rs @@ -234,6 +234,18 @@ pub struct HostInfo { pub cgroup_version: String, #[serde(default)] pub network_backend: String, + #[serde(default)] + pub security: SecurityInfo, +} + +/// Security-related fields from the Podman system info response. +/// +/// Podman returns `host.security.rootless: true` when the daemon is +/// running without root privileges (rootless mode). +#[derive(Debug, Clone, Default, serde::Deserialize)] +pub struct SecurityInfo { + #[serde(default)] + pub rootless: bool, } // ── Client ─────────────────────────────────────────────────────────────── diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 1586002ab..9798cc79f 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -102,9 +102,26 @@ pub struct PodmanComputeConfig { /// Mounted read-only into sandbox containers at /opt/openshell/bin /// using Podman's `type=image` mount. pub supervisor_image: String, + /// Host path to the CA certificate for sandbox mTLS. + /// + /// When all three TLS paths (`guest_tls_ca`, `guest_tls_cert`, + /// `guest_tls_key`) are set, the driver bind-mounts them into sandbox + /// containers and switches the auto-detected endpoint from `http://` + /// to `https://`. + pub guest_tls_ca: Option, + /// Host path to the client certificate for sandbox mTLS. + pub guest_tls_cert: Option, + /// Host path to the client private key for sandbox mTLS. + pub guest_tls_key: Option, } impl PodmanComputeConfig { + /// Returns `true` when all three TLS paths are configured. + #[must_use] + pub fn tls_enabled(&self) -> bool { + self.guest_tls_ca.is_some() && self.guest_tls_cert.is_some() && self.guest_tls_key.is_some() + } + /// Resolve the default socket path from the environment. /// /// - **macOS**: `$HOME/.local/share/containers/podman/machine/podman.sock` @@ -148,6 +165,9 @@ impl Default for PodmanComputeConfig { ssh_handshake_skew_secs: DEFAULT_SSH_HANDSHAKE_SKEW_SECS, stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), + guest_tls_ca: None, + guest_tls_cert: None, + guest_tls_key: None, } } } @@ -168,6 +188,9 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("ssh_handshake_skew_secs", &self.ssh_handshake_skew_secs) .field("stop_timeout_secs", &self.stop_timeout_secs) .field("supervisor_image", &self.supervisor_image) + .field("guest_tls_ca", &self.guest_tls_ca) + .field("guest_tls_cert", &self.guest_tls_cert) + .field("guest_tls_key", &self.guest_tls_key) .finish() } } diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index cc7bbc519..2600a53e8 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -10,6 +10,25 @@ use serde::Serialize; use serde_json::Value; use std::collections::BTreeMap; +/// Returns `true` when `SELinux` is enabled (enforcing or permissive). +/// +/// Checks whether selinuxfs is mounted, matching Podman's own detection +/// logic. Bind-mount relabeling (the `z` mount option) is needed in both +/// enforcing and permissive modes: enforcing blocks access outright, while +/// permissive floods the audit log with AVC denials that mask real issues. +/// +/// On non-`SELinux` systems (Ubuntu, macOS, Alpine) the directory does not +/// exist and this returns `false`, leaving mount options unchanged. +#[cfg(target_os = "linux")] +fn is_selinux_enabled() -> bool { + std::path::Path::new("/sys/fs/selinux").is_dir() +} + +#[cfg(not(target_os = "linux"))] +fn is_selinux_enabled() -> bool { + false +} + /// Label key for the sandbox ID. pub const LABEL_SANDBOX_ID: &str = "openshell.sandbox-id"; /// Label key for the sandbox name. @@ -25,6 +44,11 @@ const CONTAINER_PREFIX: &str = "openshell-sandbox-"; /// Volume name prefix. const VOLUME_PREFIX: &str = "openshell-sandbox-"; +/// Container-side mount paths for client TLS materials. +const TLS_CA_MOUNT_PATH: &str = "/etc/openshell/tls/client/ca.crt"; +const TLS_CERT_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.crt"; +const TLS_KEY_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.key"; + /// Build a Podman container name from the sandbox name. #[must_use] pub fn container_name(sandbox_name: &str) -> String { @@ -260,6 +284,15 @@ fn build_env( env.insert("OPENSHELL_CONTAINER_IMAGE".into(), image.to_string()); env.insert("OPENSHELL_SANDBOX_COMMAND".into(), "sleep infinity".into()); + // 3. TLS client cert paths (when mTLS is enabled). These point to + // the container-side mount paths where the cert files are + // bind-mounted from the host. + if config.tls_enabled() { + env.insert("OPENSHELL_TLS_CA".into(), TLS_CA_MOUNT_PATH.into()); + env.insert("OPENSHELL_TLS_CERT".into(), TLS_CERT_MOUNT_PATH.into()); + env.insert("OPENSHELL_TLS_KEY".into(), TLS_KEY_MOUNT_PATH.into()); + } + env } @@ -462,12 +495,51 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi // directory does not exist on the host, so the mkdir inside the container // fails with EPERM. A private tmpfs gives the supervisor its own writable // /run/netns without needing host filesystem access. - mounts: vec![Mount { - kind: "tmpfs".into(), - source: "tmpfs".into(), - destination: "/run/netns".into(), - options: vec!["rw".into(), "nosuid".into(), "nodev".into()], - }], + mounts: { + let mut m = vec![Mount { + kind: "tmpfs".into(), + source: "tmpfs".into(), + destination: "/run/netns".into(), + options: vec!["rw".into(), "nosuid".into(), "nodev".into()], + }]; + // Bind-mount client TLS materials into the container when mTLS + // is enabled. The supervisor reads these via OPENSHELL_TLS_CA, + // OPENSHELL_TLS_CERT, and OPENSHELL_TLS_KEY env vars (set in + // build_env above) to establish an mTLS connection back to the + // gateway. + if let (Some(ca), Some(cert), Some(key)) = ( + &config.guest_tls_ca, + &config.guest_tls_cert, + &config.guest_tls_key, + ) { + let mut ro = vec!["ro".into(), "rbind".into()]; + // On SELinux-enabled systems (Fedora, RHEL), bind-mounted + // files need the shared relabel option so the container + // process can read them through the SELinux MAC policy. + if is_selinux_enabled() { + ro.push("z".into()); + } + m.push(Mount { + kind: "bind".into(), + source: ca.display().to_string(), + destination: TLS_CA_MOUNT_PATH.into(), + options: ro.clone(), + }); + m.push(Mount { + kind: "bind".into(), + source: cert.display().to_string(), + destination: TLS_CERT_MOUNT_PATH.into(), + options: ro.clone(), + }); + m.push(Mount { + kind: "bind".into(), + source: key.display().to_string(), + destination: TLS_KEY_MOUNT_PATH.into(), + options: ro, + }); + } + m + }, // Publish the SSH port with host_port=0 to get an ephemeral host port. // In rootless Podman the bridge network (10.89.x.x) is not routable from // the host, so we must use the published host port on 127.0.0.1 instead. @@ -834,4 +906,91 @@ mod tests { "image volume should be read-only" ); } + + #[test] + fn container_spec_includes_tls_mounts_when_configured() { + let sandbox = test_sandbox("tls-id", "tls-name"); + let mut config = test_config(); + config.guest_tls_ca = Some(std::path::PathBuf::from("/host/ca.crt")); + config.guest_tls_cert = Some(std::path::PathBuf::from("/host/tls.crt")); + config.guest_tls_key = Some(std::path::PathBuf::from("/host/tls.key")); + + let spec = build_container_spec(&sandbox, &config); + + // Verify TLS env vars are set. + let env_map = spec["env"].as_object().expect("env should be an object"); + assert_eq!( + env_map.get("OPENSHELL_TLS_CA").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/ca.crt"), + ); + assert_eq!( + env_map.get("OPENSHELL_TLS_CERT").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/tls.crt"), + ); + assert_eq!( + env_map.get("OPENSHELL_TLS_KEY").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/tls.key"), + ); + + // Verify bind mounts exist for all three cert files. + let mounts = spec["mounts"] + .as_array() + .expect("mounts should be an array"); + let bind_dests: Vec<&str> = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .filter_map(|m| m["destination"].as_str()) + .collect(); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/ca.crt"), + "should bind-mount CA cert" + ); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/tls.crt"), + "should bind-mount client cert" + ); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/tls.key"), + "should bind-mount client key" + ); + + // Verify SELinux relabel option is present iff SELinux is enabled. + let tls_binds: Vec<&Value> = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .collect(); + let has_z = tls_binds.iter().all(|m| { + m["options"] + .as_array() + .is_some_and(|opts| opts.iter().any(|o| o.as_str() == Some("z"))) + }); + assert_eq!( + has_z, + is_selinux_enabled(), + "TLS bind mounts should include 'z' option iff SELinux is enabled" + ); + } + + #[test] + fn container_spec_omits_tls_without_config() { + let sandbox = test_sandbox("notls-id", "notls-name"); + let config = test_config(); + + let spec = build_container_spec(&sandbox, &config); + + let env_map = spec["env"].as_object().expect("env should be an object"); + assert!( + env_map.get("OPENSHELL_TLS_CA").is_none(), + "TLS env vars should not be set without TLS config" + ); + + let mounts = spec["mounts"] + .as_array() + .expect("mounts should be an array"); + let bind_count = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .count(); + assert_eq!(bind_count, 0, "no bind mounts without TLS config"); + } } diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index ae9492d74..523c046b4 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -78,7 +78,7 @@ impl PodmanComputeDriver { // Verify connectivity. client.ping().await?; - // Verify cgroups v2 and log system info. + // Verify cgroups v2, detect rootless mode, and log system info. match client.system_info().await { Ok(info) => { if info.host.cgroup_version != "v2" { @@ -92,6 +92,7 @@ impl PodmanComputeDriver { info!( cgroup_version = %info.host.cgroup_version, network_backend = %info.host.network_backend, + rootless = info.host.security.rootless, "Connected to Podman" ); } @@ -124,16 +125,24 @@ impl PodmanComputeDriver { // Auto-detect the gRPC callback endpoint when not explicitly // configured. Sandbox containers use host.containers.internal // (injected via hostadd with host-gateway in the container spec) - // to reach the gateway server on the host. This works in both - // rootful and rootless Podman — the bridge gateway IP does NOT - // work in rootless mode because it lives inside the user - // namespace, not on the host. + // to reach the gateway server on the host. The scheme is + // determined by whether TLS client certs are configured: when + // all three TLS paths are set, the endpoint uses https so the + // supervisor connects with mTLS. if config.grpc_endpoint.is_empty() { - config.grpc_endpoint = - format!("http://host.containers.internal:{}", config.gateway_port); + let scheme = if config.tls_enabled() { + "https" + } else { + "http" + }; + config.grpc_endpoint = format!( + "{scheme}://host.containers.internal:{}", + config.gateway_port + ); info!( grpc_endpoint = %config.grpc_endpoint, - "Auto-detected gRPC endpoint via host.containers.internal" + tls = config.tls_enabled(), + "Auto-detected gRPC endpoint" ); } @@ -557,51 +566,67 @@ mod tests { assert!(matches!(err, ComputeDriverError::Message(_))); } - // ── gateway_port / grpc_endpoint auto-detection ─────────────────────── + // ── grpc_endpoint auto-detection ─────────────────────────────────── // // PodmanComputeDriver::new() fills grpc_endpoint when it is empty. - // These tests use for_tests() (which skips the Podman socket handshake) - // to verify the endpoint that ends up in the config — and therefore in - // OPENSHELL_ENDPOINT inside every sandbox container. + // The scheme (http vs https) depends on whether TLS client certs are + // configured. These tests simulate the auto-detection logic. #[test] - fn grpc_endpoint_auto_detected_from_gateway_port() { - let config = PodmanComputeConfig { + fn grpc_endpoint_http_without_tls() { + let mut cfg = PodmanComputeConfig { gateway_port: 8081, ..PodmanComputeConfig::default() }; - // Simulate what new() does once the socket/network checks pass. - let mut cfg = config; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "http://host.containers.internal:8081"); } #[test] - fn grpc_endpoint_auto_detected_uses_default_port_when_gateway_port_is_default() { - let config = PodmanComputeConfig::default(); - assert_eq!( - config.gateway_port, - openshell_core::config::DEFAULT_SERVER_PORT - ); - let mut cfg = config; + fn grpc_endpoint_https_with_tls() { + let mut cfg = PodmanComputeConfig { + gateway_port: 8080, + guest_tls_ca: Some(PathBuf::from("/tls/ca.crt")), + guest_tls_cert: Some(PathBuf::from("/tls/tls.crt")), + guest_tls_key: Some(PathBuf::from("/tls/tls.key")), + ..PodmanComputeConfig::default() + }; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); + } + assert_eq!(cfg.grpc_endpoint, "https://host.containers.internal:8080"); + } + + #[test] + fn grpc_endpoint_partial_tls_falls_back_to_http() { + let mut cfg = PodmanComputeConfig { + gateway_port: 8080, + guest_tls_ca: Some(PathBuf::from("/tls/ca.crt")), + // guest_tls_cert and guest_tls_key not set — incomplete TLS config. + ..PodmanComputeConfig::default() + }; + assert!(!cfg.tls_enabled()); + if cfg.grpc_endpoint.is_empty() { + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "http://host.containers.internal:8080"); } #[test] - fn explicit_grpc_endpoint_takes_precedence_over_gateway_port() { - let config = PodmanComputeConfig { + fn explicit_grpc_endpoint_takes_precedence() { + let mut cfg = PodmanComputeConfig { grpc_endpoint: "https://gateway.internal:9000".to_string(), gateway_port: 8081, ..PodmanComputeConfig::default() }; - let mut cfg = config; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "https://gateway.internal:9000"); } diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index 6020de5bd..25cb5912f 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -86,6 +86,18 @@ struct Args { /// OCI image containing the openshell-sandbox supervisor binary. #[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE")] supervisor_image: String, + + /// Host path to the CA certificate for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_CA")] + podman_tls_ca: Option, + + /// Host path to the client certificate for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_CERT")] + podman_tls_cert: Option, + + /// Host path to the client private key for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_KEY")] + podman_tls_key: Option, } #[tokio::main] @@ -115,6 +127,9 @@ async fn main() -> Result<()> { ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, stop_timeout_secs: args.stop_timeout, supervisor_image: args.supervisor_image, + guest_tls_ca: args.podman_tls_ca, + guest_tls_cert: args.podman_tls_cert, + guest_tls_key: args.podman_tls_key, }) .await .into_diagnostic()?; diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index ae90c8b34..040cb48ad 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -332,7 +332,7 @@ async fn run_from_args(args: Args) -> Result<()> { args.port )); } - let health_bind = SocketAddr::from(([0, 0, 0, 0], args.health_port)); + let health_bind = SocketAddr::from((args.bind_address, args.health_port)); config = config.with_health_bind_address(health_bind); } @@ -349,7 +349,7 @@ async fn run_from_args(args: Args) -> Result<()> { args.health_port )); } - let metrics_bind = SocketAddr::from(([0, 0, 0, 0], args.metrics_port)); + let metrics_bind = SocketAddr::from((args.bind_address, args.metrics_port)); config = config.with_metrics_bind_address(metrics_bind); } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index f37ba472f..cad79eb56 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -505,6 +505,22 @@ async fn build_compute_runtime( .filter(|s| !s.is_empty()) .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()); + // TLS client cert paths for sandbox mTLS. When all three are + // set, the Podman driver bind-mounts them into sandbox + // containers and switches the endpoint to https://. + let podman_tls_ca = std::env::var("OPENSHELL_PODMAN_TLS_CA") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + let podman_tls_cert = std::env::var("OPENSHELL_PODMAN_TLS_CERT") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + let podman_tls_key = std::env::var("OPENSHELL_PODMAN_TLS_KEY") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + ComputeRuntime::new_podman( openshell_driver_podman::PodmanComputeConfig { socket_path, @@ -520,6 +536,9 @@ async fn build_compute_runtime( ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, stop_timeout_secs, supervisor_image, + guest_tls_ca: podman_tls_ca, + guest_tls_cert: podman_tls_cert, + guest_tls_key: podman_tls_key, }, store, sandbox_index, diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index ba5d64663..2f8280b9c 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -1727,6 +1727,7 @@ fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Re name: gateway_name.to_string(), gateway_endpoint: format!("https://127.0.0.1:{gateway_port}"), gateway_port, + client_lifecycle_managed: Some(true), ..Default::default() }; diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index ebe5e267e..ab1c7f989 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -7,9 +7,8 @@ # # Targets: # gateway Final gateway image -# supervisor Final supervisor image +# supervisor Final supervisor image (FROM scratch, binary only) # cluster Final cluster image -# supervisor-output Minimal stage exporting only the supervisor binary # # Rust binaries are built natively before the image build and staged at: # deploy/docker/.build/prebuilt-binaries//openshell-{gateway,sandbox} @@ -38,10 +37,7 @@ ARG TARGETARCH # download-artifact, which strip exec perms during the roundtrip. COPY --chmod=755 deploy/docker/.build/prebuilt-binaries/${TARGETARCH}/openshell-sandbox /build/out/openshell-sandbox -# Minimal extraction stage for fast-deploy: exports only the supervisor -# binary (~20-40 MB) instead of the entire build environment (~968 MB). -FROM scratch AS supervisor-output -COPY --from=supervisor-binary /build/out/openshell-sandbox /openshell-sandbox + # --------------------------------------------------------------------------- # Final gateway image @@ -71,22 +67,11 @@ CMD ["--port", "8080"] # --------------------------------------------------------------------------- # Final supervisor image # --------------------------------------------------------------------------- -FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 AS supervisor - -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates && \ - apt-get install -y --only-upgrade gpgv && \ - rm -rf /var/lib/apt/lists/* - -RUN useradd --create-home --user-group openshell - -WORKDIR /app - -COPY --from=supervisor-binary /build/out/openshell-sandbox /usr/local/bin/ - -USER openshell - -ENTRYPOINT ["openshell-sandbox"] +# Minimal FROM scratch image containing only the supervisor binary. +# Used by both the Docker driver (binary extraction) and the Podman driver +# (OCI image volume mount at /opt/openshell/bin). +FROM scratch AS supervisor +COPY --from=supervisor-binary /build/out/openshell-sandbox /openshell-sandbox # --------------------------------------------------------------------------- # Cluster asset stages diff --git a/deploy/man/openshell-gateway.8.md b/deploy/man/openshell-gateway.8.md new file mode 100644 index 000000000..f551e0c11 --- /dev/null +++ b/deploy/man/openshell-gateway.8.md @@ -0,0 +1,207 @@ +--- +title: OPENSHELL-GATEWAY +section: 8 +header: OpenShell Manual +footer: openshell-gateway +date: 2025 +--- + +# NAME + +openshell-gateway - OpenShell gateway server daemon + +# SYNOPSIS + +**openshell-gateway** \[*OPTIONS*\] + +# DESCRIPTION + +**openshell-gateway** is the control-plane server for OpenShell. It +manages sandbox lifecycle, stores provider credentials, delivers +network and filesystem policies to sandboxes, routes inference +requests, and provides the SSH tunnel endpoint for CLI-to-sandbox +connections. + +When installed via RPM, the gateway runs as a systemd user service +with the Podman compute driver. Sandboxes are rootless Podman +containers on the host. + +The gateway exposes a single port (default 8080) with multiplexed +gRPC and HTTP, secured by mutual TLS (mTLS) by default. + +# OPTIONS + +**--host** *IP* +: IP address to bind all listeners to. Default: **0.0.0.0**. + Environment: **OPENSHELL_BIND_HOST**. + +**--port** *PORT* +: Port for the gRPC/HTTP API. Default: **8080**. + Environment: **OPENSHELL_SERVER_PORT**. + +**--health-port** *PORT* +: Port for unauthenticated health endpoints (/healthz, /readyz). + Set to 0 to disable. Default: **0**. + Environment: **OPENSHELL_HEALTH_PORT**. + +**--metrics-port** *PORT* +: Port for Prometheus metrics (/metrics). Set to 0 to disable. + Default: **0**. Environment: **OPENSHELL_METRICS_PORT**. + +**--log-level** *LEVEL* +: Log level: trace, debug, info, warn, error. Default: **info**. + Environment: **OPENSHELL_LOG_LEVEL**. + +**--db-url** *URL* +: SQLite database URL for state persistence. Required. + Environment: **OPENSHELL_DB_URL**. + +**--drivers** *DRIVER*\[,*DRIVER*\] +: Compute driver. Accepts a comma-delimited list. The gateway + currently requires exactly one driver. Options: **podman**, + **docker**, **kubernetes**. Default: **kubernetes**. + Environment: **OPENSHELL_DRIVERS**. + +**--tls-cert** *PATH* +: Path to server TLS certificate file. Required unless + **--disable-tls** is set. Environment: **OPENSHELL_TLS_CERT**. + +**--tls-key** *PATH* +: Path to server TLS private key file. Required unless + **--disable-tls** is set. Environment: **OPENSHELL_TLS_KEY**. + +**--tls-client-ca** *PATH* +: Path to CA certificate for client certificate verification (mTLS). + Required unless **--disable-tls** is set. + Environment: **OPENSHELL_TLS_CLIENT_CA**. + +**--disable-tls** +: Disable TLS entirely and listen on plaintext HTTP. Use when the + gateway sits behind a TLS-terminating reverse proxy. + Environment: **OPENSHELL_DISABLE_TLS**. + +**--disable-gateway-auth** +: Disable mTLS client certificate requirement. The TLS handshake + accepts connections without a client certificate. Ignored when + **--disable-tls** is set. + Environment: **OPENSHELL_DISABLE_GATEWAY_AUTH**. + +**--sandbox-image** *IMAGE* +: Default container image for sandboxes. + Environment: **OPENSHELL_SANDBOX_IMAGE**. + +**--sandbox-image-pull-policy** *POLICY* +: Image pull policy: Always, IfNotPresent, Never. + Environment: **OPENSHELL_SANDBOX_IMAGE_PULL_POLICY**. + +**--ssh-handshake-secret** *SECRET* +: Shared secret for gateway-to-sandbox SSH handshake. + Environment: **OPENSHELL_SSH_HANDSHAKE_SECRET**. + +**--ssh-handshake-skew-secs** *SECONDS* +: Allowed clock skew in seconds for SSH handshake. Default: **30**. + Environment: **OPENSHELL_SSH_HANDSHAKE_SKEW_SECS**. + +**--ssh-gateway-host** *HOST* +: Public host for the SSH gateway endpoint. Default: **127.0.0.1**. + Environment: **OPENSHELL_SSH_GATEWAY_HOST**. + +**--ssh-gateway-port** *PORT* +: Public port for the SSH gateway endpoint. Default: **8080**. + Environment: **OPENSHELL_SSH_GATEWAY_PORT**. + +**--grpc-endpoint** *URL* +: gRPC endpoint for sandbox callbacks. Should be reachable from + within sandbox containers. + Environment: **OPENSHELL_GRPC_ENDPOINT**. + +# SYSTEMD INTEGRATION + +The RPM installs a systemd user unit at +*/usr/lib/systemd/user/openshell-gateway.service*. Manage the gateway +with standard systemd commands: + + systemctl --user enable --now openshell-gateway + systemctl --user status openshell-gateway + systemctl --user restart openshell-gateway + systemctl --user stop openshell-gateway + +View logs: + + journalctl --user -u openshell-gateway + journalctl --user -u openshell-gateway -f + +The unit runs two **ExecStartPre** scripts on first start: + +1. **init-pki.sh** generates a self-signed PKI bundle for mTLS. +2. **init-gateway-env.sh** generates the environment configuration + file with an auto-generated SSH handshake secret. + +Both scripts are idempotent and skip generation if their output files +already exist. + +To persist the service across logouts: + + sudo loginctl enable-linger $USER + +# CONFIGURATION + +The systemd user unit reads configuration from +*~/.config/openshell/gateway.env*. See **openshell-gateway.env**(5) +for the full variable reference. + +To override individual settings without modifying gateway.env: + + systemctl --user edit openshell-gateway + +This creates a drop-in override that persists across package upgrades. + +# FILES + +*/usr/bin/openshell-gateway* +: Gateway binary. + +*/usr/lib/systemd/user/openshell-gateway.service* +: Systemd user unit file. + +*/usr/libexec/openshell/init-pki.sh* +: PKI bootstrap script. + +*/usr/libexec/openshell/init-gateway-env.sh* +: Gateway environment file generator. + +*~/.config/openshell/gateway.env* +: Gateway environment configuration (generated on first start). + +*~/.local/state/openshell/tls/* +: Auto-generated TLS certificates. + +*~/.local/state/openshell/gateway.db* +: SQLite database for gateway state. + +*~/.config/openshell/gateways/openshell/mtls/* +: Client mTLS certificates for CLI auto-discovery. + +# EXAMPLES + +Start the gateway as a systemd user service: + + systemctl --user enable --now openshell-gateway + +Check gateway health from the CLI: + + openshell gateway add --local https://127.0.0.1:8080 + openshell status + +Override the API port via a systemd drop-in: + + systemctl --user edit openshell-gateway + # Add: [Service] + # Add: Environment=OPENSHELL_SERVER_PORT=9090 + +# SEE ALSO + +**openshell**(1), **openshell-gateway.env**(5), **systemctl**(1), +**journalctl**(1), **loginctl**(1), **podman**(1) + +Full documentation: *https://docs.nvidia.com/openshell/* diff --git a/deploy/man/openshell-gateway.env.5.md b/deploy/man/openshell-gateway.env.5.md new file mode 100644 index 000000000..7e6da7cb6 --- /dev/null +++ b/deploy/man/openshell-gateway.env.5.md @@ -0,0 +1,161 @@ +--- +title: OPENSHELL-GATEWAY.ENV +section: 5 +header: OpenShell Manual +footer: openshell-gateway +date: 2025 +--- + +# NAME + +openshell-gateway.env - OpenShell gateway environment configuration + +# DESCRIPTION + +The **openshell-gateway.env** file contains environment variables that +configure the OpenShell gateway server when running as a systemd user +service. It is generated automatically on first start by +**init-gateway-env.sh** and is not overwritten on subsequent starts or +package upgrades. + +The file uses the standard systemd **EnvironmentFile** format: one +**KEY=VALUE** pair per line. Lines beginning with **#** are comments. +Shell variable expansion is not performed. + +# LOCATION + +The file is located at: + + ~/.config/openshell/gateway.env + +The systemd user unit reads it via: + + EnvironmentFile=-~/.config/openshell/gateway.env + +The **-** prefix means the service starts normally if the file does not +exist (the unit has built-in defaults for all required settings except +the SSH handshake secret). + +# VARIABLES + +## Required + +**OPENSHELL_SSH_HANDSHAKE_SECRET** +: Shared HMAC secret for gateway-to-sandbox SSH handshake + authentication. Auto-generated as a 32-byte hex string on first + start. To regenerate: **openssl rand -hex 32**. + +## Gateway + +**OPENSHELL_BIND_HOST** (default: 0.0.0.0) +: IP address to bind all listeners to. + +**OPENSHELL_SERVER_PORT** (default: 8080) +: Port for the multiplexed gRPC/HTTP API. + +**OPENSHELL_HEALTH_PORT** (default: 0) +: Port for unauthenticated health endpoints (/healthz, /readyz). + Set to a non-zero value to enable a dedicated health listener. + +**OPENSHELL_METRICS_PORT** (default: 0) +: Port for Prometheus metrics endpoint (/metrics). Set to a + non-zero value to enable a dedicated metrics listener. + +**OPENSHELL_LOG_LEVEL** (default: info) +: Log verbosity: **trace**, **debug**, **info**, **warn**, **error**. + +**OPENSHELL_DRIVERS** (default: podman) +: Compute driver for sandbox management. Options: **podman**, + **docker**, **kubernetes**. The RPM unit defaults to **podman**. + +**OPENSHELL_DB_URL** (default: sqlite://$XDG_STATE_HOME/openshell/gateway.db) +: SQLite database URL for gateway state persistence. + +**OPENSHELL_DISABLE_GATEWAY_AUTH** (default: unset) +: Set to **true** to disable mTLS client certificate verification. + +## TLS + +**OPENSHELL_TLS_CERT** (default: auto-generated path) +: Path to server TLS certificate. + +**OPENSHELL_TLS_KEY** (default: auto-generated path) +: Path to server TLS private key. + +**OPENSHELL_TLS_CLIENT_CA** (default: auto-generated path) +: Path to CA certificate for client certificate verification. + +**OPENSHELL_DISABLE_TLS** (default: unset) +: Set to **true** to disable TLS entirely and listen on plaintext + HTTP. Not recommended for production. + +**OPENSHELL_PODMAN_TLS_CA** (default: auto-generated path) +: CA certificate bind-mounted into sandbox containers. + +**OPENSHELL_PODMAN_TLS_CERT** (default: auto-generated path) +: Client certificate bind-mounted into sandbox containers. + +**OPENSHELL_PODMAN_TLS_KEY** (default: auto-generated path) +: Client private key bind-mounted into sandbox containers. + +## Images + +**OPENSHELL_SUPERVISOR_IMAGE** (default: ghcr.io/nvidia/openshell/supervisor:latest) +: OCI image containing the supervisor binary, mounted read-only + into sandbox containers. + +**OPENSHELL_SANDBOX_IMAGE** (default: ghcr.io/nvidia/openshell-community/sandboxes/base:latest) +: Default OCI image for sandbox containers. + +**OPENSHELL_SANDBOX_IMAGE_PULL_POLICY** (default: missing) +: When to pull sandbox images: **always** (every sandbox creation), + **missing** (only if not cached locally), **never** (use cached + only), **newer** (pull if a newer version exists). + +## Podman Driver + +**OPENSHELL_PODMAN_SOCKET** (default: $XDG_RUNTIME_DIR/podman/podman.sock) +: Path to the Podman API Unix socket. + +**OPENSHELL_NETWORK_NAME** (default: openshell) +: Name of the Podman bridge network for sandbox containers. Created + automatically if it does not exist. + +**OPENSHELL_STOP_TIMEOUT** (default: 10) +: Seconds to wait after SIGTERM before sending SIGKILL when stopping + a sandbox container. + +# EXAMPLES + +Change the API port to 9090: + + OPENSHELL_SERVER_PORT=9090 + +Pin sandbox images to a specific version: + + OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 + OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 + +Air-gapped deployment (pre-loaded images, no registry access): + + OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never + +Enable debug logging: + + OPENSHELL_LOG_LEVEL=debug + +Use externally-managed TLS certificates: + + OPENSHELL_TLS_CERT=/etc/pki/tls/certs/openshell.crt + OPENSHELL_TLS_KEY=/etc/pki/tls/private/openshell.key + OPENSHELL_TLS_CLIENT_CA=/etc/pki/tls/certs/openshell-ca.crt + +Disable TLS (behind a reverse proxy): + + OPENSHELL_DISABLE_TLS=true + +# SEE ALSO + +**openshell-gateway**(8), **openshell**(1), **systemd.exec**(5) + +Full documentation: *https://docs.nvidia.com/openshell/* diff --git a/deploy/man/openshell.1.md b/deploy/man/openshell.1.md new file mode 100644 index 000000000..65e060899 --- /dev/null +++ b/deploy/man/openshell.1.md @@ -0,0 +1,217 @@ +--- +title: OPENSHELL +section: 1 +header: OpenShell Manual +footer: openshell +date: 2025 +--- + +# NAME + +openshell - CLI for managing OpenShell sandboxes, gateways, and providers + +# SYNOPSIS + +**openshell** \[*OPTIONS*\] *COMMAND* \[*ARGS*\] + +# DESCRIPTION + +**openshell** is the command-line interface for OpenShell, a platform +providing safe, sandboxed runtimes for autonomous AI agents. It manages +the gateway control plane, sandbox lifecycle, credential providers, +network policies, and inference routing. + +The CLI communicates with a gateway server over gRPC. The gateway can +run as a systemd user service (RPM deployment with Podman driver), a +Docker container with embedded K3s, or behind a cloud reverse proxy. + +# COMMANDS + +## Gateway Management + +**gateway start** +: Deploy a new gateway using Docker (not applicable to RPM deployments; + use **systemctl --user start openshell-gateway** instead). + +**gateway stop** +: Stop a Docker-managed gateway (use **systemctl --user stop + openshell-gateway** for RPM deployments). + +**gateway destroy** \[**--name** *NAME*\] +: Destroy a gateway. For RPM deployments, this removes the CLI + registration only. + +**gateway add** *ENDPOINT* \[**--local**\] \[**--name** *NAME*\] \[**--remote** *USER@HOST*\] +: Register an existing gateway with the CLI. + +**gateway select** \[*NAME*\] +: List registered gateways or switch the active gateway. + +**gateway info** \[**--name** *NAME*\] +: Show deployment details for a gateway. + +**gateway login** +: Re-authenticate with a cloud gateway. + +**status** +: Check the health of the active gateway. + +## Sandbox Management + +**sandbox create** \[**--from** *IMAGE*\] \[**--policy** *FILE*\] \[**--provider** *NAME*\] \[**--gpu**\] \[**--upload** *SRC:DST*\] \[**--forward** *PORT*\] \[**--** *COMMAND*\] +: Create a new sandbox. If no gateway exists, auto-bootstraps one + (Docker mode only). + +**sandbox list** \[**--selector** *LABEL*\] +: List all sandboxes on the active gateway. + +**sandbox get** *NAME* +: Show details for a sandbox. + +**sandbox delete** *NAME* \| **--all** +: Delete one or all sandboxes. + +**sandbox connect** *NAME* \[**--editor** *EDITOR*\] +: SSH into a running sandbox. + +**sandbox exec** **-n** *NAME* \[**--workdir** *DIR*\] **--** *COMMAND* +: Execute a command in a sandbox. + +**sandbox upload** *NAME* *LOCAL* *REMOTE* +: Upload files to a sandbox. + +**sandbox download** *NAME* *REMOTE* *LOCAL* +: Download files from a sandbox. + +## Policy Management + +**policy get** *SANDBOX* \[**--full**\] +: Show the active policy for a sandbox. + +**policy set** *SANDBOX* **--policy** *FILE* \[**--wait**\] +: Apply a policy to a sandbox. + +**policy update** *SANDBOX* \[**--add-endpoint** *SPEC*\] \[**--add-allow** *RULE*\] +: Incrementally update a sandbox policy. + +**policy list** *SANDBOX* +: Show policy revision history. + +**policy prove** **--policy** *FILE* \[**--credentials** *FILE*\] +: Verify policy properties. + +## Provider Management + +**provider create** **--name** *NAME* **--type** *TYPE* \[**--from-existing**\] \[**--credential** *KEY=VALUE*\] +: Create a credential provider. + +**provider list** +: List all providers. + +**provider get** *NAME* +: Show provider details. + +**provider update** *NAME* \[**--from-existing**\] \[**--credential** *KEY=VALUE*\] +: Update provider credentials. + +**provider delete** *NAME* +: Delete a provider. + +## Inference Routing + +**inference set** **--provider** *NAME* **--model** *MODEL* +: Configure inference routing. + +**inference get** +: Show current inference configuration. + +**inference update** \[**--model** *MODEL*\] +: Update inference configuration. + +## Other + +**logs** *SANDBOX* \[**--tail**\] +: View sandbox logs. + +**forward start** *PORT* *SANDBOX* \[**-d**\] +: Start port forwarding to a sandbox. + +**forward stop** *PORT* +: Stop port forwarding. + +**forward list** +: List active port forwards. + +**term** +: Open the real-time TUI dashboard. + +**doctor check** \| **logs** \| **exec** \| **llm.txt** +: Diagnostic tools (Docker/K3s mode only; see **TROUBLESHOOTING** + section for RPM alternatives). + +**completions** *SHELL* +: Generate shell completions (bash, zsh, fish). + +# GLOBAL OPTIONS + +**-g**, **--gateway** *NAME* +: Target a specific gateway by name. + +**--gateway-endpoint** *URL* +: Connect to a gateway by URL directly. + +**-h**, **--help** +: Print help information. + +**-V**, **--version** +: Print version. + +# ENVIRONMENT + +**OPENSHELL_GATEWAY** +: Default gateway name (overrides active gateway). + +**OPENSHELL_GATEWAY_ENDPOINT** +: Direct gateway URL (bypasses metadata lookup). + +**ANTHROPIC_API_KEY**, **OPENAI_API_KEY**, **OPENROUTER_API_KEY** +: API keys discovered by auto-provider creation. + +**GITHUB_TOKEN**, **GH_TOKEN** +: GitHub credentials for provider auto-discovery. + +# FILES + +*~/.config/openshell/gateways/* +: Gateway metadata and mTLS certificates. + +*~/.config/openshell/active_gateway* +: Name of the currently active gateway. + +# EXAMPLES + +Register the local RPM gateway and create a sandbox: + + openshell gateway add --local https://127.0.0.1:8080 + openshell sandbox create -- claude + +List sandboxes and connect to one: + + openshell sandbox list + openshell sandbox connect my-sandbox + +Create a provider from a local environment variable: + + openshell provider create --name openai --type openai --from-existing + +Check gateway health: + + openshell status + +# SEE ALSO + +**openshell-gateway**(8), **openshell-gateway.env**(5) + +Full documentation: *https://docs.nvidia.com/openshell/* + +Run **openshell** *COMMAND* **--help** for detailed help on any command. diff --git a/deploy/rpm/CONFIGURATION.md b/deploy/rpm/CONFIGURATION.md new file mode 100644 index 000000000..724d283de --- /dev/null +++ b/deploy/rpm/CONFIGURATION.md @@ -0,0 +1,247 @@ +# OpenShell Gateway Configuration (RPM) + +Configuration reference for the OpenShell gateway when installed via +the RPM package on Fedora and RHEL systems. + +For first-time setup, see QUICKSTART.md. For troubleshooting, see +TROUBLESHOOTING.md. + +## TLS (mTLS) + +The RPM enables mutual TLS by default. The gateway requires a valid +client certificate for all API connections, protecting the API even +though it listens on all interfaces (`0.0.0.0`). + +### Auto-generated certificates + +On first start, the `init-pki.sh` script generates certificates using +OpenSSL: + +| File | Purpose | Location | +|------|---------|----------| +| CA certificate | Root of trust | `~/.local/state/openshell/tls/ca.crt` | +| CA private key | Signs server and client certs | `~/.local/state/openshell/tls/ca.key` | +| Server certificate | Gateway TLS identity | `~/.local/state/openshell/tls/server/tls.crt` | +| Server private key | Gateway TLS key | `~/.local/state/openshell/tls/server/tls.key` | +| Client certificate | CLI and sandbox identity | `~/.local/state/openshell/tls/client/tls.crt` | +| Client private key | CLI and sandbox key | `~/.local/state/openshell/tls/client/tls.key` | + +Client certificates are also copied to the CLI auto-discovery directory: + +``` +~/.config/openshell/gateways/openshell/mtls/ + ca.crt + tls.crt + tls.key +``` + +The CLI automatically discovers these certificates when connecting to a +gateway on `localhost` or `127.0.0.1`. + +### Server certificate SANs + +The auto-generated server certificate includes these Subject Alternative +Names: + +- `localhost` +- `openshell` +- `openshell.openshell.svc` +- `openshell.openshell.svc.cluster.local` +- `host.containers.internal` +- `host.docker.internal` +- `127.0.0.1` + +To connect from a remote machine, you need externally-managed +certificates with additional SANs. See "Remote CLI access" in +TROUBLESHOOTING.md. + +### Using externally-managed certificates + +To use certificates from an external CA or cert-manager: + +1. Place the server cert, key, and CA cert on the filesystem. + +1. Edit `~/.config/openshell/gateway.env` or use + `systemctl --user edit openshell-gateway` to override: + + ```shell + OPENSHELL_TLS_CERT=/path/to/server/tls.crt + OPENSHELL_TLS_KEY=/path/to/server/tls.key + OPENSHELL_TLS_CLIENT_CA=/path/to/ca.crt + ``` + +1. Place the client cert where the CLI expects it: + + ``` + ~/.config/openshell/gateways/openshell/mtls/ + ca.crt + tls.crt + tls.key + ``` + +### Rotating certificates + +Delete the TLS state directory and restart the gateway: + +```shell +rm -rf ~/.local/state/openshell/tls +systemctl --user restart openshell-gateway +``` + +The gateway regenerates the PKI on next start. + +### Disabling TLS + +To disable TLS (not recommended for production): + +1. Edit `~/.config/openshell/gateway.env`: + + ```shell + OPENSHELL_DISABLE_TLS=true + ``` + +1. Comment out the `OPENSHELL_TLS_*` and `OPENSHELL_PODMAN_TLS_*` + variables if they are set. + +1. Restart the gateway. + +With TLS disabled, the gateway has no authentication. Any host that can +reach the gateway port has full access to the API. + +## Sandbox TLS + +When mTLS is enabled, the Podman driver bind-mounts the client +certificates into each sandbox container so the supervisor process can +establish an mTLS connection back to the gateway. + +The following environment variables control the host-side paths of the +client certificates that are mounted into sandbox containers: + +| Variable | Description | +|----------|-------------| +| `OPENSHELL_PODMAN_TLS_CA` | CA certificate (host path) | +| `OPENSHELL_PODMAN_TLS_CERT` | Client certificate (host path) | +| `OPENSHELL_PODMAN_TLS_KEY` | Client private key (host path) | + +Inside the container, the supervisor reads them from: + +- `/etc/openshell/tls/client/ca.crt` +- `/etc/openshell/tls/client/tls.crt` +- `/etc/openshell/tls/client/tls.key` + +On SELinux-enabled systems, the Podman driver automatically applies the +`:z` relabel option to these bind mounts. No manual SELinux +configuration is required. + +## Configuration reference + +All settings are controlled via environment variables. The user unit +reads from `~/.config/openshell/gateway.env` (generated on first start) +and from `Environment=` directives in the systemd unit. + +Values in `gateway.env` override the unit defaults. Use +`systemctl --user edit openshell-gateway` to add overrides that persist +across package upgrades. + +### Gateway settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_BIND_HOST` | `0.0.0.0` | IP address to bind all listeners to | +| `OPENSHELL_SERVER_PORT` | `8080` | Port for the gRPC/HTTP API | +| `OPENSHELL_HEALTH_PORT` | `0` (disabled) | Port for unauthenticated health endpoints (`/healthz`, `/readyz`). Set to a non-zero value to enable. | +| `OPENSHELL_METRICS_PORT` | `0` (disabled) | Port for Prometheus metrics (`/metrics`). Set to a non-zero value to enable. | +| `OPENSHELL_LOG_LEVEL` | `info` | Log level: `trace`, `debug`, `info`, `warn`, `error` | +| `OPENSHELL_DRIVERS` | `podman` | Compute driver (`podman`, `docker`, `kubernetes`) | +| `OPENSHELL_DB_URL` | `sqlite://$XDG_STATE_HOME/openshell/gateway.db` | SQLite database URL for state persistence | +| `OPENSHELL_SSH_HANDSHAKE_SECRET` | (auto-generated) | Shared secret for sandbox SSH authentication | +| `OPENSHELL_DISABLE_GATEWAY_AUTH` | (unset) | Set to `true` to skip mTLS client certificate checks | + +### TLS settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_TLS_CERT` | (auto-generated path) | Server TLS certificate | +| `OPENSHELL_TLS_KEY` | (auto-generated path) | Server TLS private key | +| `OPENSHELL_TLS_CLIENT_CA` | (auto-generated path) | CA for client certificate verification | +| `OPENSHELL_DISABLE_TLS` | (unset) | Set to `true` to disable TLS | +| `OPENSHELL_PODMAN_TLS_CA` | (auto-generated path) | CA cert mounted into sandbox containers | +| `OPENSHELL_PODMAN_TLS_CERT` | (auto-generated path) | Client cert mounted into sandbox containers | +| `OPENSHELL_PODMAN_TLS_KEY` | (auto-generated path) | Client key mounted into sandbox containers | + +### Sandbox settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_SUPERVISOR_IMAGE` | `ghcr.io/nvidia/openshell/supervisor:latest` | Supervisor binary OCI image | +| `OPENSHELL_SANDBOX_IMAGE` | `ghcr.io/nvidia/openshell-community/sandboxes/base:latest` | Default sandbox base image | +| `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY` | `missing` | Image pull policy: `always`, `missing`, `never`, `newer` | + +### Podman driver settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_PODMAN_SOCKET` | `$XDG_RUNTIME_DIR/podman/podman.sock` | Podman API Unix socket path | +| `OPENSHELL_NETWORK_NAME` | `openshell` | Podman bridge network name for sandbox containers | +| `OPENSHELL_STOP_TIMEOUT` | `10` | Container stop timeout in seconds (SIGTERM then SIGKILL) | + +### Image management + +The gateway pulls container images automatically on first sandbox +creation. The default pull policy is `missing`, which means images are +pulled once and then cached by Podman. + +To update cached images: + +```shell +podman pull ghcr.io/nvidia/openshell/supervisor:latest +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` + +Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` to pull on every +sandbox creation. + +To pin specific image versions instead of `:latest`: + +```shell +OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 +OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 +``` + +For air-gapped environments: + +1. On a connected machine, pull and save the images: + + ```shell + podman pull ghcr.io/nvidia/openshell/supervisor:latest + podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest + podman save -o supervisor.tar ghcr.io/nvidia/openshell/supervisor:latest + podman save -o sandbox.tar ghcr.io/nvidia/openshell-community/sandboxes/base:latest + ``` + +1. Transfer the tarballs to the air-gapped host and load them: + + ```shell + podman load -i supervisor.tar + podman load -i sandbox.tar + ``` + +1. Set pull policy to `never`: + + ```shell + OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never + ``` + +## File locations + +| Purpose | Path | +|---------|------| +| Gateway binary | `/usr/bin/openshell-gateway` | +| CLI binary | `/usr/bin/openshell` | +| Systemd user unit | `/usr/lib/systemd/user/openshell-gateway.service` | +| PKI bootstrap script | `/usr/libexec/openshell/init-pki.sh` | +| Env generator script | `/usr/libexec/openshell/init-gateway-env.sh` | +| TLS certificates | `~/.local/state/openshell/tls/` | +| CLI client certs | `~/.config/openshell/gateways/openshell/mtls/` | +| Gateway database | `~/.local/state/openshell/gateway.db` | +| Gateway configuration | `~/.config/openshell/gateway.env` | diff --git a/deploy/rpm/QUICKSTART.md b/deploy/rpm/QUICKSTART.md new file mode 100644 index 000000000..3be0c6fb6 --- /dev/null +++ b/deploy/rpm/QUICKSTART.md @@ -0,0 +1,158 @@ +# OpenShell RPM Quick Start + +Get from `dnf install` to a running sandbox in five minutes. + +## Prerequisites + +### Podman (rootless) + +The gateway uses rootless Podman for sandbox containers. Verify +Podman is installed and the cgroup version is v2: + +```shell +podman --version +podman info --format '{{.Host.CgroupsVersion}}' +``` + +The cgroup version must be `v2`. If it reports `v1`, enable the +unified cgroup hierarchy and reboot: + +```shell +sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1" +sudo reboot +``` + +### Subordinate UID/GID ranges + +Rootless containers require subordinate UID/GID mappings: + +```shell +grep $USER /etc/subuid /etc/subgid +``` + +If empty, add entries: + +```shell +sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 $USER +``` + +### Podman socket + +The gateway communicates with Podman over its API socket. Enable +socket activation: + +```shell +systemctl --user enable --now podman.socket +``` + +### Network access + +The gateway pulls container images from ghcr.io on first sandbox +creation. Ensure the host can reach ghcr.io over HTTPS (port 443). + +For air-gapped environments, pre-load images with `podman pull` and +set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never` in +`~/.config/openshell/gateway.env`. See CONFIGURATION.md for details. + +## Start the gateway + +```shell +systemctl --user enable --now openshell-gateway +``` + +On first start, the gateway automatically generates: + +- A self-signed PKI bundle (CA, server cert, client cert) for mTLS +- An SSH handshake secret for sandbox authentication +- A commented configuration file at `~/.config/openshell/gateway.env` + +Verify the service is running: + +```shell +systemctl --user status openshell-gateway +``` + +## Register the gateway with the CLI + +The CLI needs to know where the gateway is. Register it: + +```shell +openshell gateway add --local https://127.0.0.1:8080 +``` + +This discovers the pre-provisioned mTLS certificates at +`~/.config/openshell/gateways/openshell/mtls/` and sets the gateway +as active. + +Verify the connection: + +```shell +openshell status +``` + +## Persist across reboots + +By default, user services stop when you log out. To keep the gateway +running after logout and across reboots: + +```shell +sudo loginctl enable-linger $USER +``` + +Without this, the gateway and all running sandboxes are killed when +your login session ends. This is required for any headless or +production use. + +## Create your first sandbox + +Set your API key in the environment, then create a sandbox: + +```shell +export ANTHROPIC_API_KEY=sk-... +openshell sandbox create -- claude +``` + +The CLI detects the agent, prompts to create a credential provider +from your local environment, pulls the sandbox image from ghcr.io, +and connects you to the running sandbox. + +Other agents: + +```shell +openshell sandbox create -- opencode +openshell sandbox create -- codex +``` + +## Set up providers manually + +If you prefer to configure providers before creating sandboxes: + +```shell +# Create a provider from a local environment variable +openshell provider create --name anthropic --type anthropic --from-existing + +# Or supply the credential directly +openshell provider create --name openai --type openai \ + --credential OPENAI_API_KEY=sk-... + +# List configured providers +openshell provider list +``` + +## Configure inference routing (optional) + +To route inference requests through a specific provider and model: + +```shell +openshell inference set --provider openai --model gpt-4 +openshell inference get +``` + +## Next steps + +- See CONFIGURATION.md for TLS settings, environment variables, and + file locations. +- See TROUBLESHOOTING.md for CLI compatibility notes, remote access, + and common issues. +- Run `man openshell` for the CLI reference. +- Run `man openshell-gateway` for the gateway daemon reference. diff --git a/deploy/rpm/TROUBLESHOOTING.md b/deploy/rpm/TROUBLESHOOTING.md new file mode 100644 index 000000000..78ef4c475 --- /dev/null +++ b/deploy/rpm/TROUBLESHOOTING.md @@ -0,0 +1,230 @@ +# OpenShell RPM Troubleshooting + +Troubleshooting guide, CLI compatibility notes, remote access setup, +and upgrade procedures for the RPM deployment. + +## CLI compatibility + +The RPM installs the gateway as a systemd user service with the Podman +compute driver. The published online docs and some CLI commands assume +a Docker/K3s deployment model. This section clarifies which commands +work, which do not, and what to use instead. + +### Commands that work normally + +All sandbox, provider, policy, inference, and settings commands +communicate with the gateway over gRPC and work identically regardless +of deployment mode: + +``` +openshell status +openshell sandbox create|list|get|delete|connect|exec +openshell logs +openshell provider create|list|get|update|delete +openshell policy get|set|update|list|prove +openshell inference set|get|update +openshell settings get|set +openshell forward start|stop|list +openshell term +openshell gateway add|select|info +openshell gateway destroy (removes CLI registration only) +``` + +### Commands that do not apply + +These commands manage Docker container lifecycle and are not applicable +to the RPM/systemd deployment. Use the systemd equivalents instead. + +| CLI command | RPM alternative | +|-------------|-----------------| +| `openshell gateway start` | `systemctl --user start openshell-gateway` | +| `openshell gateway stop` | `systemctl --user stop openshell-gateway` | +| `openshell doctor check` | `systemctl --user status openshell-gateway` | +| `openshell doctor logs` | `journalctl --user -u openshell-gateway` | +| `openshell doctor logs --tail` | `journalctl --user -u openshell-gateway -f` | +| `openshell doctor exec` | Not applicable (no K3s container) | + +### Building from local Dockerfiles + +`openshell sandbox create --from ./Dockerfile` builds via Docker and +pushes into K3s containerd. With the Podman driver, build the image +with Podman and reference it directly: + +```shell +podman build -t my-sandbox ./my-dir +openshell sandbox create --from localhost/my-sandbox +``` + +## Remote CLI access + +The auto-generated server certificate only includes SANs for +`localhost`, `127.0.0.1`, and Podman-internal names. To connect from a +different machine, choose one of the following approaches. + +### Option 1: SSH tunnel (simplest) + +Forward the gateway port over SSH and connect via localhost: + +```shell +# On the remote CLI machine: +ssh -L 8080:127.0.0.1:8080 user@gateway-host + +# In another terminal on the same machine: +# Copy the client certs from the gateway host first: +scp -r user@gateway-host:~/.config/openshell/gateways/openshell/mtls/ \ + ~/.config/openshell/gateways/openshell/mtls/ + +openshell gateway add --local https://127.0.0.1:8080 +openshell status +``` + +### Option 2: Externally-managed certificates + +Generate certificates that include the server's hostname or IP in the +SANs. See "Using externally-managed certificates" in CONFIGURATION.md. + +After placing the server and client certs, register from the remote +CLI: + +```shell +# Copy client certs to the remote CLI machine +mkdir -p ~/.config/openshell/gateways/openshell/mtls/ +cp ca.crt tls.crt tls.key ~/.config/openshell/gateways/openshell/mtls/ + +openshell gateway add --local https://:8080 +``` + +### Firewall + +For remote access, open the gateway port in firewalld: + +```shell +sudo firewall-cmd --add-port=8080/tcp --permanent +sudo firewall-cmd --reload +``` + +For localhost-only access (the default use case), no firewall changes +are needed. Loopback traffic is not filtered by firewalld. + +mTLS prevents unauthenticated access even when the port is open to the +network. + +## Common issues + +### "No active gateway" + +The CLI cannot find a registered gateway. This happens when the +gateway is running but has not been registered with the CLI. + +```shell +openshell gateway add --local https://127.0.0.1:8080 +``` + +### Gateway fails to start + +Check the journal for error details: + +```shell +journalctl --user -u openshell-gateway --no-pager -n 50 +``` + +Common causes: + +**cgroups v1 detected.** The Podman driver requires cgroups v2. +Check the version: + +```shell +stat -fc %T /sys/fs/cgroup +``` + +Expected output: `cgroup2fs`. If it shows `tmpfs`, enable cgroups v2: + +```shell +sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1" +sudo reboot +``` + +**Podman socket not available.** Ensure socket activation is enabled: + +```shell +systemctl --user enable --now podman.socket +systemctl --user status podman.socket +``` + +**TLS certificate errors.** If certs are corrupted, regenerate them: + +```shell +rm -rf ~/.local/state/openshell/tls +systemctl --user restart openshell-gateway +``` + +### Sandbox creation fails + +**subuid/subgid missing.** Rootless Podman requires subordinate +UID/GID ranges. If the journal shows warnings about `/etc/subuid` or +container creation fails: + +```shell +grep $USER /etc/subuid /etc/subgid +# If empty: +sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 $USER +``` + +**Image pull failure.** Verify ghcr.io is reachable: + +```shell +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` + +### Images not updating + +The default image pull policy is `missing` -- images are pulled once +and cached. To update: + +```shell +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +podman pull ghcr.io/nvidia/openshell/supervisor:latest +``` + +Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` in +`~/.config/openshell/gateway.env` and restart the gateway. + +### Gateway stops on logout + +Enable lingering so the service survives logout: + +```shell +sudo loginctl enable-linger $USER +``` + +## SELinux + +No SELinux configuration is required on stock Fedora or RHEL. The +Podman driver automatically applies the `:z` relabel option to TLS +bind mounts when SELinux is detected, allowing sandbox containers to +read the certificates through the MAC policy. + +## Upgrading + +After upgrading the RPM packages: + +```shell +sudo dnf update openshell openshell-gateway +systemctl --user restart openshell-gateway +``` + +The SQLite database schema is auto-migrated on startup. Running +sandboxes are stopped during the restart. + +The `gateway.env` file is not overwritten during upgrades. The +`init-gateway-env.sh` script is idempotent and only generates the file +on first start. New configuration options from newer versions can be +added manually by referencing CONFIGURATION.md or running +`openshell-gateway --help`. + +To pick up new container images after an upgrade: + +```shell +podman pull ghcr.io/nvidia/openshell/supervisor:latest +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` diff --git a/deploy/rpm/init-gateway-env.sh b/deploy/rpm/init-gateway-env.sh new file mode 100644 index 000000000..7f7287c48 --- /dev/null +++ b/deploy/rpm/init-gateway-env.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Generate the gateway environment configuration file on first start. +# +# Called from the systemd ExecStartPre directive to bootstrap the +# gateway configuration. Idempotent: exits immediately if the file +# already exists. +# +# Usage: +# init-gateway-env.sh +# +# The generated file contains an auto-generated SSH handshake secret +# and commented defaults for all gateway environment variables. + +set -euo pipefail + +ENV_FILE="${1:?Usage: init-gateway-env.sh }" + +# ── Idempotent: skip if env file already exists ───────────────────── +if [ -f "${ENV_FILE}" ]; then + exit 0 +fi + +# ── Create parent directory ───────────────────────────────────────── +mkdir -p "$(dirname "${ENV_FILE}")" + +# ── Generate SSH handshake secret ─────────────────────────────────── +SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f) + +# ── Write environment file ────────────────────────────────────────── +cat > "${ENV_FILE}" << EOF +# OpenShell Gateway Environment Configuration +# Generated on first start. Edit freely; this file is not overwritten. +# +# Run 'openshell-gateway --help' for the full list of options. +# See /usr/share/doc/openshell-gateway/ for guides. + +# ---- Required ---- + +# Shared secret for gateway-to-sandbox SSH handshake authentication. +# Auto-generated on first start. To regenerate: +# openssl rand -hex 32 +OPENSHELL_SSH_HANDSHAKE_SECRET=${SECRET} + +# ---- Optional (uncomment to override defaults) ---- + +# Database URL for gateway state persistence. +# Default for the user unit: sqlite://\$XDG_STATE_HOME/openshell/gateway.db +#OPENSHELL_DB_URL=sqlite:///path/to/gateway.db + +# Compute driver: podman (default for RPM), docker, kubernetes. +#OPENSHELL_DRIVERS=podman + +# Bind address. 0.0.0.0 listens on all interfaces; mTLS prevents +# unauthenticated access. +#OPENSHELL_BIND_HOST=0.0.0.0 + +# API port (default: 8080). +#OPENSHELL_SERVER_PORT=8080 + +# Log level: trace, debug, info, warn, error. +#OPENSHELL_LOG_LEVEL=info + +# ---- Images ---- + +# Supervisor binary OCI image (mounted read-only into sandboxes). +#OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest + +# Default sandbox base image. +#OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest + +# Image pull policy: always, missing (default), never, newer. +# Use 'always' to pick up new tags automatically. +# Use 'never' for air-gapped environments with pre-loaded images. +#OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=missing + +# ---- TLS (mTLS enabled by default) ---- +# PKI is auto-generated by init-pki.sh on first start. Client certs are +# placed in ~/.config/openshell/gateways/openshell/mtls/ so the CLI +# discovers them automatically. +# +# To use externally-managed certs, uncomment and edit the paths below. +# To rotate certs, delete ~/.local/state/openshell/tls/ and restart. +# To disable TLS (NOT RECOMMENDED), uncomment the next line and +# comment out all OPENSHELL_TLS_* and OPENSHELL_PODMAN_TLS_* variables. +#OPENSHELL_DISABLE_TLS=true + +# Server TLS (gateway listens with these certs). +#OPENSHELL_TLS_CERT=\$XDG_STATE_HOME/openshell/tls/server/tls.crt +#OPENSHELL_TLS_KEY=\$XDG_STATE_HOME/openshell/tls/server/tls.key +#OPENSHELL_TLS_CLIENT_CA=\$XDG_STATE_HOME/openshell/tls/ca.crt + +# Podman driver: client certs bind-mounted into sandbox containers. +#OPENSHELL_PODMAN_TLS_CA=\$XDG_STATE_HOME/openshell/tls/ca.crt +#OPENSHELL_PODMAN_TLS_CERT=\$XDG_STATE_HOME/openshell/tls/client/tls.crt +#OPENSHELL_PODMAN_TLS_KEY=\$XDG_STATE_HOME/openshell/tls/client/tls.key + +# ---- Podman driver ---- + +# Podman API Unix socket path. +#OPENSHELL_PODMAN_SOCKET=\$XDG_RUNTIME_DIR/podman/podman.sock + +# Podman bridge network name for sandbox containers. +#OPENSHELL_NETWORK_NAME=openshell + +# Container stop timeout in seconds (SIGTERM then SIGKILL). +#OPENSHELL_STOP_TIMEOUT=10 +EOF + +chmod 600 "${ENV_FILE}" +echo "Gateway environment generated: ${ENV_FILE}" diff --git a/deploy/rpm/init-pki.sh b/deploy/rpm/init-pki.sh new file mode 100755 index 000000000..e26102d1a --- /dev/null +++ b/deploy/rpm/init-pki.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Generate a self-signed PKI bundle for the OpenShell gateway. +# +# Called from the systemd ExecStartPre directive to bootstrap mTLS on +# first start. Idempotent: exits immediately if certs already exist. +# +# Usage: +# init-pki.sh +# +# Output layout: +# /ca.crt CA certificate +# /ca.key CA private key (mode 0600) +# /server/tls.crt Server certificate +# /server/tls.key Server private key (mode 0600) +# /client/tls.crt Client certificate +# /client/tls.key Client private key (mode 0600) +# +# Client certs are also copied to the CLI's auto-discovery directory: +# $XDG_CONFIG_HOME/openshell/gateways/openshell/mtls/{ca.crt,tls.crt,tls.key} + +set -euo pipefail + +PKI_DIR="${1:?Usage: init-pki.sh }" + +# ── Idempotent: skip if CA already exists ──────────────────────────── +if [ -f "${PKI_DIR}/ca.crt" ]; then + exit 0 +fi + +# ── Resolve CLI cert directory ─────────────────────────────────────── +CLI_MTLS_DIR="${XDG_CONFIG_HOME:-${HOME}/.config}/openshell/gateways/openshell/mtls" + +# ── Create directories ─────────────────────────────────────────────── +mkdir -p "${PKI_DIR}/server" "${PKI_DIR}/client" "${CLI_MTLS_DIR}" + +# ── Temporary workspace (cleaned up on exit) ───────────────────────── +TMPDIR=$(mktemp -d) +trap 'rm -rf "${TMPDIR}"' EXIT + +# ── Server certificate SANs ───────────────────────────────────────── +# These must match what the supervisor connects to. The CLI also +# connects using localhost/127.0.0.1 by default. +cat > "${TMPDIR}/server-san.cnf" <<'EOF' +[req] +distinguished_name = req_dn +req_extensions = v3_req +prompt = no + +[req_dn] +O = openshell +CN = openshell-server + +[v3_req] +subjectAltName = @alt_names + +[alt_names] +DNS.1 = localhost +DNS.2 = openshell +DNS.3 = openshell.openshell.svc +DNS.4 = openshell.openshell.svc.cluster.local +DNS.5 = host.containers.internal +DNS.6 = host.docker.internal +IP.1 = 127.0.0.1 +EOF + +# ── Generate CA ────────────────────────────────────────────────────── +openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/ca.key" \ + -out "${PKI_DIR}/ca.crt" \ + -days 3650 -nodes \ + -subj "/O=openshell/CN=openshell-ca" \ + 2>/dev/null +chmod 600 "${PKI_DIR}/ca.key" + +# ── Generate server certificate ────────────────────────────────────── +openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/server/tls.key" \ + -out "${TMPDIR}/server.csr" \ + -nodes \ + -config "${TMPDIR}/server-san.cnf" \ + 2>/dev/null + +openssl x509 -req \ + -in "${TMPDIR}/server.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/server/tls.crt" \ + -days 3650 \ + -extensions v3_req \ + -extfile "${TMPDIR}/server-san.cnf" \ + 2>/dev/null +chmod 600 "${PKI_DIR}/server/tls.key" + +# ── Generate client certificate ────────────────────────────────────── +openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/client/tls.key" \ + -out "${TMPDIR}/client.csr" \ + -nodes \ + -subj "/O=openshell/CN=openshell-client" \ + 2>/dev/null + +openssl x509 -req \ + -in "${TMPDIR}/client.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/client/tls.crt" \ + -days 3650 \ + 2>/dev/null +chmod 600 "${PKI_DIR}/client/tls.key" + +# ── Copy client certs to CLI auto-discovery directory ──────────────── +# The CLI automatically looks for certs at: +# $XDG_CONFIG_HOME/openshell/gateways//mtls/{ca.crt,tls.crt,tls.key} +# For localhost gateways, defaults to "openshell". +cp "${PKI_DIR}/ca.crt" "${CLI_MTLS_DIR}/ca.crt" +cp "${PKI_DIR}/client/tls.crt" "${CLI_MTLS_DIR}/tls.crt" +cp "${PKI_DIR}/client/tls.key" "${CLI_MTLS_DIR}/tls.key" +chmod 600 "${CLI_MTLS_DIR}/tls.key" + +echo "PKI bootstrap complete: ${PKI_DIR}" diff --git a/e2e/rust/e2e-podman.sh b/e2e/rust/e2e-podman.sh index 38c6e6b7c..e2681e008 100755 --- a/e2e/rust/e2e-podman.sh +++ b/e2e/rust/e2e-podman.sh @@ -20,7 +20,7 @@ # # Prerequisites: # - Rootless Podman service running (systemctl --user start podman.socket) -# - Supervisor sideload image built (mise run build:docker:supervisor-sideload) +# - Supervisor image built (mise run build:docker:supervisor) # - Sandbox base image available locally set -euo pipefail @@ -68,7 +68,7 @@ SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-}" # Verify the supervisor image exists locally. if ! podman image exists "${SUPERVISOR_IMAGE}" 2>/dev/null; then echo "ERROR: supervisor image '${SUPERVISOR_IMAGE}' not found locally." - echo "Build it with: mise run build:docker:supervisor-sideload" + echo "Build it with: mise run build:docker:supervisor" exit 1 fi diff --git a/openshell.spec b/openshell.spec new file mode 100644 index 000000000..ab93c5fa3 --- /dev/null +++ b/openshell.spec @@ -0,0 +1,275 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +%global crate openshell + +# Cargo/Rust builds with vendored deps do not produce debugsource listings +# in the format redhat-rpm-config expects (especially on EPEL). +%global debug_package %{nil} + +Name: openshell +Version: 0.0.37 +Release: 1.20260501160805739969.rpm.73.ge7c4151d%{?dist} +Summary: Safe, sandboxed runtimes for autonomous AI agents + +License: Apache-2.0 +URL: https://github.com/NVIDIA/OpenShell +Source0: openshell-0.0.37.tar.gz +Source1: openshell-0.0.37-vendor.tar.xz + +ExclusiveArch: x86_64 aarch64 + +# Rust build dependencies +# NOTE: MSRV is 1.88 (Rust edition 2024). As of mid-2025, this requires +# Fedora Rawhide or newer. Stable Fedora and EPEL-10 may ship older Rust; +# adjust targets in .packit.yaml accordingly or provide a supplementary +# Rust toolchain via additional_repos in the COPR build config. +BuildRequires: rust >= 1.88 +BuildRequires: cargo +BuildRequires: cargo-rpm-macros >= 25 +BuildRequires: gcc +BuildRequires: gcc-c++ +BuildRequires: make +BuildRequires: cmake +BuildRequires: pkg-config +BuildRequires: clang-devel +BuildRequires: z3-devel +BuildRequires: systemd-rpm-macros + +# Man page generation +BuildRequires: pandoc + +# Python sub-package build dependencies +BuildRequires: python3-devel + +# Runtime: container runtime for gateway lifecycle (start/stop/destroy). +# Podman is preferred; Docker is also supported via --container-runtime flag. +Recommends: podman + +%description +OpenShell provides safe, sandboxed runtimes for autonomous AI agents. +It offers a CLI for managing gateways, sandboxes, and providers with +policy-enforced egress routing, credential proxying, and privacy-aware +LLM inference routing. + +# --- Gateway sub-package --- +%package gateway +Summary: OpenShell gateway server with Podman sandbox driver +Requires: podman +Requires: openssl +Requires: %{name} = %{version}-%{release} + +%description gateway +OpenShell gateway server providing the control-plane API for sandbox +lifecycle management. This package configures the gateway to use the +Podman compute driver, pulling sandbox and supervisor images from +ghcr.io/nvidia/openshell. + +# --- Python SDK sub-package --- +%package -n python3-%{name} +Summary: OpenShell Python SDK for agent execution and management +# Use Recommends instead of Requires because Fedora 43+ ships older +# versions of grpcio (1.48) and protobuf (3.19) than the SDK needs. +# Users on distros with older packages can install these via pip/uv. +Recommends: python3-cloudpickle >= 3.0 +Recommends: python3-grpcio >= 1.60 +Recommends: python3-protobuf >= 4.25 +Recommends: %{name} + +%description -n python3-%{name} +Python SDK for OpenShell providing programmatic access to sandbox +management, agent execution, and inference routing via gRPC. + +%prep +%autosetup -n %{name}-%{version} + +# Extract vendored Cargo dependencies and configure offline build +tar xf %{SOURCE1} +%cargo_prep -v vendor + +# Patch workspace version from placeholder to actual version +sed -i 's/^version = "0.0.0"/version = "%{version}"/' Cargo.toml +grep -q 'version = "%{version}"' Cargo.toml || (echo "ERROR: Cargo.toml version patch failed" && exit 1) + +%build +# Build the CLI and gateway binaries +export CARGO_BUILD_JOBS=%{_smp_build_ncpus} +# Set the default container image tag so compiled-in image refs point at +# real tags in the ghcr.io/nvidia/openshell registry. +export OPENSHELL_IMAGE_TAG=latest +cargo build --release --bin openshell --bin openshell-gateway + +# Generate vendored crate manifest and license metadata. +# cargo-vendor.txt is consumed by an RPM generator (from cargo-rpm-macros) +# to emit Provides: bundled(crate(...)) = version for every vendored dep. +%cargo_vendor_manifest +%{cargo_license_summary} +%{cargo_license} > LICENSE.dependencies + +# Build man pages from markdown +pandoc -s -t man deploy/man/openshell.1.md -o openshell.1 +pandoc -s -t man deploy/man/openshell-gateway.8.md -o openshell-gateway.8 +pandoc -s -t man deploy/man/openshell-gateway.env.5.md -o openshell-gateway.env.5 + +%install +# --- CLI binary --- +install -Dpm 0755 target/release/%{name} %{buildroot}%{_bindir}/%{name} + +# --- Gateway binary --- +install -Dpm 0755 target/release/%{name}-gateway %{buildroot}%{_bindir}/%{name}-gateway + +# --- Gateway systemd user unit (rootless Podman) --- +# Installed to the systemd user unit directory so any user can run: +# systemctl --user enable --now openshell-gateway.service +# Podman socket activation provides the container API. +install -d %{buildroot}%{_userunitdir} +cat > %{buildroot}%{_userunitdir}/%{name}-gateway.service << 'EOF' +[Unit] +Description=OpenShell Gateway (user) +Documentation=https://github.com/NVIDIA/OpenShell +After=podman.socket +Requires=podman.socket + +[Service] +Type=exec +# Self-contained defaults for rootless operation with mTLS. +# +# PKI and gateway.env are auto-generated on first start. Client certs +# are placed in ~/.config/openshell/gateways/openshell/mtls/ so the +# CLI discovers them automatically. +# See /usr/share/doc/openshell-gateway/ for details. + +# Auto-generate PKI on first start if not present. +# %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. +ExecStartPre=%{_libexecdir}/openshell/init-pki.sh %%S/openshell/tls + +# Auto-generate gateway.env (SSH handshake secret + commented config +# reference) on first start if not present. +# %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. +ExecStartPre=%{_libexecdir}/openshell/init-gateway-env.sh %%E/openshell/gateway.env +EnvironmentFile=-%%E/openshell/gateway.env +Environment=OPENSHELL_BIND_HOST=0.0.0.0 +Environment=OPENSHELL_DRIVERS=podman +Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db +Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest +Environment=OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest +# mTLS: auto-generated certs in the state directory. +Environment=OPENSHELL_TLS_CERT=%%S/openshell/tls/server/tls.crt +Environment=OPENSHELL_TLS_KEY=%%S/openshell/tls/server/tls.key +Environment=OPENSHELL_TLS_CLIENT_CA=%%S/openshell/tls/ca.crt +# Podman driver: client certs bind-mounted into sandbox containers. +Environment=OPENSHELL_PODMAN_TLS_CA=%%S/openshell/tls/ca.crt +Environment=OPENSHELL_PODMAN_TLS_CERT=%%S/openshell/tls/client/tls.crt +Environment=OPENSHELL_PODMAN_TLS_KEY=%%S/openshell/tls/client/tls.key +ExecStart=/usr/bin/openshell-gateway +StateDirectory=openshell +Restart=on-failure +RestartSec=5 + +# Security hardening +NoNewPrivileges=yes +ProtectSystem=strict +PrivateTmp=yes +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + +[Install] +WantedBy=default.target +EOF + +# --- PKI bootstrap script and gateway env generator --- +install -d %{buildroot}%{_libexecdir}/%{name} +install -pm 0755 deploy/rpm/init-pki.sh %{buildroot}%{_libexecdir}/%{name}/init-pki.sh +install -pm 0755 deploy/rpm/init-gateway-env.sh %{buildroot}%{_libexecdir}/%{name}/init-gateway-env.sh + +# --- Gateway documentation --- +install -d %{buildroot}%{_docdir}/%{name}-gateway +install -pm 0644 deploy/rpm/QUICKSTART.md %{buildroot}%{_docdir}/%{name}-gateway/QUICKSTART.md +install -pm 0644 deploy/rpm/CONFIGURATION.md %{buildroot}%{_docdir}/%{name}-gateway/CONFIGURATION.md +install -pm 0644 deploy/rpm/TROUBLESHOOTING.md %{buildroot}%{_docdir}/%{name}-gateway/TROUBLESHOOTING.md + +# --- Man pages --- +install -Dpm 0644 openshell.1 %{buildroot}%{_mandir}/man1/openshell.1 +install -Dpm 0644 openshell-gateway.8 %{buildroot}%{_mandir}/man8/openshell-gateway.8 +install -Dpm 0644 openshell-gateway.env.5 %{buildroot}%{_mandir}/man5/openshell-gateway.env.5 + +# --- Python SDK --- +# Install Python SDK modules (test files are intentionally excluded) +install -d %{buildroot}%{python3_sitelib}/%{name} +install -d %{buildroot}%{python3_sitelib}/%{name}/_proto + +install -pm 0644 python/%{name}/__init__.py %{buildroot}%{python3_sitelib}/%{name}/ +install -pm 0644 python/%{name}/sandbox.py %{buildroot}%{python3_sitelib}/%{name}/ +install -pm 0644 python/%{name}/_proto/__init__.py %{buildroot}%{python3_sitelib}/%{name}/_proto/ +install -pm 0644 python/%{name}/_proto/*.py %{buildroot}%{python3_sitelib}/%{name}/_proto/ + +# Create dist-info so importlib.metadata can resolve the package version +install -d %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info +cat > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/METADATA << EOF +Metadata-Version: 2.1 +Name: %{name} +Version: 0.0.37 +Summary: OpenShell Python SDK for agent execution and management +License: Apache-2.0 +Requires-Python: >=3.12 +Requires-Dist: cloudpickle>=3.0 +Requires-Dist: grpcio>=1.60 +Requires-Dist: protobuf>=4.25 +EOF + +# INSTALLER marker per PEP 376 +echo "rpm" > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/INSTALLER + +# RECORD can be empty for RPM-managed installs +touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD + +%check +# Smoke-test the CLI binary +%{buildroot}%{_bindir}/%{name} --version + +# Smoke-test the gateway binary +%{buildroot}%{_bindir}/%{name}-gateway --version + +# Smoke-test the Python SDK version metadata via importlib.metadata. +# We query the dist-info directly rather than importing the package because +# the full import pulls in grpcio and other runtime deps not present in the +# build environment. +PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata import version; v = version('openshell'); print(v); assert v == '%{version}', f'expected %{version}, got {v}'" + +%post gateway +%systemd_user_post %{name}-gateway.service + +%preun gateway +%systemd_user_preun %{name}-gateway.service + +%postun gateway +%systemd_user_postun_with_restart %{name}-gateway.service + +%files +%license LICENSE +%license LICENSE.dependencies +%license cargo-vendor.txt +%doc README.md +%{_bindir}/%{name} +%{_mandir}/man1/openshell.1* + +%files gateway +%license LICENSE +%license LICENSE.dependencies +%license cargo-vendor.txt +%doc %{_docdir}/%{name}-gateway/QUICKSTART.md +%doc %{_docdir}/%{name}-gateway/CONFIGURATION.md +%doc %{_docdir}/%{name}-gateway/TROUBLESHOOTING.md +%{_bindir}/%{name}-gateway +%{_userunitdir}/%{name}-gateway.service +%{_libexecdir}/%{name}/init-pki.sh +%{_libexecdir}/%{name}/init-gateway-env.sh +%{_mandir}/man8/openshell-gateway.8* +%{_mandir}/man5/openshell-gateway.env.5* + +%files -n python3-%{name} +%license LICENSE +%{python3_sitelib}/%{name}/ +%{python3_sitelib}/%{name}-%{version}.dist-info/ + +%changelog +%autochangelog diff --git a/tasks/docker.toml b/tasks/docker.toml index b952d559c..78796d868 100644 --- a/tasks/docker.toml +++ b/tasks/docker.toml @@ -9,7 +9,6 @@ depends = [ "build:docker:gateway", "build:docker:cluster", "build:docker:supervisor", - "build:docker:supervisor-sideload", ] hide = true @@ -29,15 +28,10 @@ run = "tasks/scripts/docker-build-image.sh gateway" hide = true ["build:docker:supervisor"] -description = "Build the standalone supervisor Docker image (Ubuntu-based, for K8s pods)" +description = "Build the supervisor image (FROM scratch, binary only)" run = "tasks/scripts/docker-build-image.sh supervisor" hide = true -["build:docker:supervisor-sideload"] -description = "Build the supervisor sideload image (FROM scratch, for Podman image-volume mount)" -run = "tasks/scripts/docker-build-image.sh supervisor-output" -hide = true - ["build:docker:cluster"] description = "Build the k3s cluster image (component images pulled at runtime from registry)" run = "tasks/scripts/docker-build-image.sh cluster" @@ -53,11 +47,6 @@ description = "Alias for build:docker:supervisor" depends = ["build:docker:supervisor"] hide = true -["docker:build:supervisor-sideload"] -description = "Alias for build:docker:supervisor-sideload" -depends = ["build:docker:supervisor-sideload"] -hide = true - ["docker:build:cluster"] description = "Alias for build:docker:cluster" depends = ["build:docker:cluster"] diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 537b2a2cc..997a631ad 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -43,7 +43,7 @@ required_prebuilt_binaries() { gateway) echo "openshell-gateway" ;; - supervisor|cluster|supervisor-output) + supervisor|cluster|supervisor-sideload|supervisor-output) echo "openshell-sandbox" ;; esac @@ -90,7 +90,7 @@ ensure_prebuilt_binaries() { fi } -TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} +TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} shift DOCKERFILE="deploy/docker/Dockerfile.images" @@ -118,10 +118,14 @@ case "${TARGET}" in IMAGE_NAME="openshell/cluster" DOCKER_TARGET="cluster" ;; + supervisor-builder) + DOCKER_TARGET="supervisor-builder" + ;; supervisor-output) + # Backward-compat alias: same as "supervisor". IS_FINAL_IMAGE=1 IMAGE_NAME="openshell/supervisor" - DOCKER_TARGET="supervisor-output" + DOCKER_TARGET="supervisor" ;; *) echo "Error: unsupported target '${TARGET}'" >&2 diff --git a/tasks/scripts/gateway-docker.sh b/tasks/scripts/gateway-docker.sh index a481692a3..269933c96 100644 --- a/tasks/scripts/gateway-docker.sh +++ b/tasks/scripts/gateway-docker.sh @@ -137,7 +137,7 @@ if [[ "${HOST_OS}" == "Linux" && "${HOST_ARCH}" == "${DAEMON_ARCH}" ]]; then cp "${ROOT}/target/${SUPERVISOR_TARGET}/debug/openshell-sandbox" "${SUPERVISOR_BIN}" else # Cross-compile through the prebuilt-binary staging helper, then use the - # supervisor-output stage to extract just the openshell-sandbox binary. + # supervisor stage to extract just the openshell-sandbox binary. # # This task is gated on a working Docker daemon above, so pin the # container-engine helper to docker — otherwise it auto-detects podman diff --git a/tasks/test.toml b/tasks/test.toml index 90ef47190..df4a1db20 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -50,7 +50,7 @@ run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2 ["e2e:podman"] description = "Start a Podman-backed gateway and run smoke e2e (requires rootless Podman; pass -- --port=N to override)" -depends = ["build:docker:supervisor-sideload"] +depends = ["build:docker:supervisor"] run = "e2e/rust/e2e-podman.sh" ["e2e:vm"]