From 465287d1cde7cb9e69c2b33de8dc53f01a39409b Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Mon, 27 Apr 2026 19:27:00 -0500 Subject: [PATCH 01/18] feat: add RPM packaging with packit and COPR integration Add .packit.yaml and openshell.spec for building RPMs via Fedora COPR. Produces three sub-packages: openshell (CLI), openshell-gateway (server with system and user systemd units), and python3-openshell (SDK). The user unit supports rootless Podman operation out of the box. --- .packit.yaml | 77 ++++++++++++++ openshell.spec | 265 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 .packit.yaml create mode 100644 openshell.spec diff --git a/.packit.yaml b/.packit.yaml new file mode 100644 index 000000000..d9de04b2f --- /dev/null +++ b/.packit.yaml @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Packit configuration for OpenShell RPM builds via Fedora COPR. +# See https://packit.dev/docs/configuration for full reference. + +upstream_tag_template: "v{version}" +upstream_package_name: openshell +downstream_package_name: openshell +specfile_path: openshell.spec + +# Packages needed in the SRPM build environment to create vendor tarball +srpm_build_deps: + - rust + - cargo + - git-core + +actions: + get-current-version: + # Derive version from the latest upstream tag on the current branch. + - 'bash -c "git describe --tags --match ''v*'' --abbrev=0 HEAD | sed ''s/^v//''"' + + create-archive: + # Step 1: Create source tarball from git working tree. + # Uses git ls-files + tar instead of git archive so the tarball + # reflects any patching that Packit may have done (e.g. version bumps). + - 'bash -c "VERSION=${PACKIT_PROJECT_VERSION} && TMPDIR=$(mktemp -d) && DIR=openshell-${VERSION} && mkdir -p ${TMPDIR}/${DIR} && git ls-files -z | xargs -0 tar cf - | tar xf - -C ${TMPDIR}/${DIR}/ && tar -czf openshell-${VERSION}.tar.gz -C ${TMPDIR} ${DIR} && rm -rf ${TMPDIR}"' + # Step 2: Create vendored Cargo dependencies tarball for offline RPM build. + - 'bash -c "VERSION=${PACKIT_PROJECT_VERSION} && cargo vendor --quiet && tar -cJf openshell-${VERSION}-vendor.tar.xz vendor/ && rm -rf vendor/"' + # Step 3: Return BOTH archive names. Packit maps each line to Source0, Source1, etc. + - 'bash -c "echo openshell-${PACKIT_PROJECT_VERSION}.tar.gz && echo openshell-${PACKIT_PROJECT_VERSION}-vendor.tar.xz"' + + fix-spec-file: + # Update Source0 to the generated tarball name + - 'bash -c "sed -i \"s|^Source0:.*|Source0: openshell-${PACKIT_PROJECT_VERSION}.tar.gz|\" openshell.spec"' + # Update Source1 to the generated vendor tarball name + - 'bash -c "sed -i \"s|^Source1:.*|Source1: openshell-${PACKIT_PROJECT_VERSION}-vendor.tar.xz|\" openshell.spec"' + # Update Version + - 'bash -c "sed -i -r \"s/^Version:(\\s*)\\S+/Version:\\1${PACKIT_RPMSPEC_VERSION}/\" openshell.spec"' + # Update Release + - 'bash -c "sed -i -r \"s/^Release:(\\s*)\\S+/Release:\\1${PACKIT_RPMSPEC_RELEASE}%{?dist}/\" openshell.spec"' + +jobs: + # Build on every pull request targeting main for CI validation + - job: copr_build + trigger: pull_request + branch: main + identifier: main-pr + targets: + - fedora-all + - epel-10 + + # Build into maxamillion/openshell on every commit to main + # for continuous development and testing builds. + - job: copr_build + trigger: commit + branch: main + owner: "maxamillion" + project: "openshell" + identifier: main-commit + targets: + - fedora-all + - epel-10 + preserve_project: true + list_on_homepage: true + + # Build on GitHub releases for publishable RPMs. + # See: https://packit.dev/docs/configuration/upstream/copr_build#using-a-custom-copr-project + - job: copr_build + trigger: release + owner: "maxamillion" + project: "openshell" + targets: + - fedora-all + - epel-10 + preserve_project: true + list_on_homepage: true diff --git a/openshell.spec b/openshell.spec new file mode 100644 index 000000000..d05b7da11 --- /dev/null +++ b/openshell.spec @@ -0,0 +1,265 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +%global crate openshell + +# Cargo/Rust builds with vendored deps do not produce debugsource listings +# in the format redhat-rpm-config expects (especially on EPEL). +%global debug_package %{nil} + +Name: openshell +Version: 0.0.37 +Release: 1.20260427180107195036.rpm.23.gcde20dc3%{?dist} +Summary: Safe, sandboxed runtimes for autonomous AI agents + +License: Apache-2.0 +URL: https://github.com/NVIDIA/OpenShell +Source0: openshell-0.0.37.tar.gz +Source1: openshell-0.0.37-vendor.tar.xz + +ExclusiveArch: x86_64 aarch64 + +# Rust build dependencies +# NOTE: MSRV is 1.88 (Rust edition 2024). As of mid-2025, this requires +# Fedora Rawhide or newer. Stable Fedora and EPEL-10 may ship older Rust; +# adjust targets in .packit.yaml accordingly or provide a supplementary +# Rust toolchain via additional_repos in the COPR build config. +BuildRequires: rust >= 1.88 +BuildRequires: cargo +BuildRequires: gcc +BuildRequires: gcc-c++ +BuildRequires: make +BuildRequires: cmake +BuildRequires: pkg-config +BuildRequires: clang-devel +BuildRequires: z3-devel +BuildRequires: systemd-rpm-macros + +# Python sub-package build dependencies +BuildRequires: python3-devel + +# Runtime: container runtime for gateway lifecycle (start/stop/destroy). +# Podman is preferred; Docker is also supported via --container-runtime flag. +Recommends: podman + +%description +OpenShell provides safe, sandboxed runtimes for autonomous AI agents. +It offers a CLI for managing gateways, sandboxes, and providers with +policy-enforced egress routing, credential proxying, and privacy-aware +LLM inference routing. + +# --- Gateway sub-package --- +%package gateway +Summary: OpenShell gateway server with Podman sandbox driver +Requires: podman +Requires: %{name} = %{version}-%{release} + +%description gateway +OpenShell gateway server providing the control-plane API for sandbox +lifecycle management. This package configures the gateway to use the +Podman compute driver, pulling sandbox and supervisor images from +ghcr.io/nvidia/openshell. + +# --- Python SDK sub-package --- +%package -n python3-%{name} +Summary: OpenShell Python SDK for agent execution and management +# Use Recommends instead of Requires because Fedora 43+ ships older +# versions of grpcio (1.48) and protobuf (3.19) than the SDK needs. +# Users on distros with older packages can install these via pip/uv. +Recommends: python3-cloudpickle >= 3.0 +Recommends: python3-grpcio >= 1.60 +Recommends: python3-protobuf >= 4.25 +Recommends: %{name} + +%description -n python3-%{name} +Python SDK for OpenShell providing programmatic access to sandbox +management, agent execution, and inference routing via gRPC. + +%prep +%autosetup -n %{name}-%{version} + +# Extract vendored Cargo dependencies +tar xf %{SOURCE1} + +# Configure Cargo to use vendored dependencies for offline build +mkdir -p .cargo +cat > .cargo/config.toml << 'EOF' +[source.crates-io] +replace-with = "vendored-sources" + +[source.vendored-sources] +directory = "vendor" +EOF + +# Patch workspace version from placeholder to actual version +sed -i 's/^version = "0.0.0"/version = "%{version}"/' Cargo.toml +grep -q 'version = "%{version}"' Cargo.toml || (echo "ERROR: Cargo.toml version patch failed" && exit 1) + +%build +# Build the CLI and gateway binaries +export CARGO_BUILD_JOBS=%{_smp_build_ncpus} +# Set the default container image tag so compiled-in image refs point at +# real tags in the ghcr.io/nvidia/openshell registry. +export OPENSHELL_IMAGE_TAG=latest +cargo build --release --bin openshell --bin openshell-gateway + +%install +# --- CLI binary --- +install -Dpm 0755 target/release/%{name} %{buildroot}%{_bindir}/%{name} + +# --- Gateway binary --- +install -Dpm 0755 target/release/%{name}-gateway %{buildroot}%{_bindir}/%{name}-gateway + +# --- Gateway systemd unit --- +install -d %{buildroot}%{_unitdir} +cat > %{buildroot}%{_unitdir}/%{name}-gateway.service << 'EOF' +[Unit] +Description=OpenShell Gateway +Documentation=https://github.com/NVIDIA/OpenShell +After=network-online.target podman.socket +Wants=podman.socket + +[Service] +Type=exec +EnvironmentFile=/etc/sysconfig/openshell-gateway +ExecStart=/usr/bin/openshell-gateway +StateDirectory=openshell +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target +EOF + +# --- Gateway systemd user unit (rootless Podman) --- +# Installed to the systemd user unit directory so any user can run: +# systemctl --user start podman.socket +# systemctl --user enable --now openshell-gateway.service +install -d %{buildroot}%{_userunitdir} +cat > %{buildroot}%{_userunitdir}/%{name}-gateway.service << 'EOF' +[Unit] +Description=OpenShell Gateway (user) +Documentation=https://github.com/NVIDIA/OpenShell +After=podman.socket +Wants=podman.socket + +[Service] +Type=exec +# Read system defaults (optional) -- driver, images, TLS settings. +EnvironmentFile=-/etc/sysconfig/openshell-gateway +# Override the DB path for user-level state directory. +# %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. +Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db +ExecStart=/usr/bin/openshell-gateway +StateDirectory=openshell +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=default.target +EOF + +# --- Gateway environment file --- +# Provides defaults for the Podman driver and GHCR image references. +# Admins can override these values by editing this file. +install -d %{buildroot}%{_sysconfdir}/sysconfig +cat > %{buildroot}%{_sysconfdir}/sysconfig/%{name}-gateway << 'EOF' +# OpenShell Gateway configuration +# See: openshell-gateway --help for all available options. + +# Compute driver: use Podman for sandbox container lifecycle. +OPENSHELL_DRIVERS=podman + +# Database URL for gateway state persistence. +OPENSHELL_DB_URL=sqlite:///var/lib/openshell/gateway.db + +# Supervisor image mounted into sandbox containers. +OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest + +# Default sandbox base image. +OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest + +# Disable TLS for local single-node operation. +# For production, comment this out and configure --tls-cert, --tls-key, +# and --tls-client-ca. +OPENSHELL_DISABLE_TLS=true +EOF + +# --- Gateway state directory --- +install -d %{buildroot}%{_sharedstatedir}/%{name} + +# --- Python SDK --- +# Install Python SDK modules (test files are intentionally excluded) +install -d %{buildroot}%{python3_sitelib}/%{name} +install -d %{buildroot}%{python3_sitelib}/%{name}/_proto + +install -pm 0644 python/%{name}/__init__.py %{buildroot}%{python3_sitelib}/%{name}/ +install -pm 0644 python/%{name}/sandbox.py %{buildroot}%{python3_sitelib}/%{name}/ +install -pm 0644 python/%{name}/_proto/__init__.py %{buildroot}%{python3_sitelib}/%{name}/_proto/ +install -pm 0644 python/%{name}/_proto/*.py %{buildroot}%{python3_sitelib}/%{name}/_proto/ + +# Create dist-info so importlib.metadata can resolve the package version +install -d %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info +cat > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/METADATA << EOF +Metadata-Version: 2.1 +Name: %{name} +Version: 0.0.37 +Summary: OpenShell Python SDK for agent execution and management +License: Apache-2.0 +Requires-Python: >=3.12 +Requires-Dist: cloudpickle>=3.0 +Requires-Dist: grpcio>=1.60 +Requires-Dist: protobuf>=4.25 +EOF + +# INSTALLER marker per PEP 376 +echo "rpm" > %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/INSTALLER + +# RECORD can be empty for RPM-managed installs +touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD + +%check +# Smoke-test the CLI binary +%{buildroot}%{_bindir}/%{name} --version + +# Smoke-test the gateway binary +%{buildroot}%{_bindir}/%{name}-gateway --version + +# Smoke-test the Python SDK version metadata via importlib.metadata. +# We query the dist-info directly rather than importing the package because +# the full import pulls in grpcio and other runtime deps not present in the +# build environment. +PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata import version; v = version('openshell'); print(v); assert v == '%{version}', f'expected %{version}, got {v}'" + +%post gateway +%systemd_post %{name}-gateway.service +%systemd_user_post %{name}-gateway.service + +%preun gateway +%systemd_preun %{name}-gateway.service +%systemd_user_preun %{name}-gateway.service + +%postun gateway +%systemd_postun_with_restart %{name}-gateway.service +%systemd_user_postun_with_restart %{name}-gateway.service + +%files +%license LICENSE +%doc README.md +%{_bindir}/%{name} + +%files gateway +%license LICENSE +%{_bindir}/%{name}-gateway +%{_unitdir}/%{name}-gateway.service +%{_userunitdir}/%{name}-gateway.service +%config(noreplace) %{_sysconfdir}/sysconfig/%{name}-gateway +%dir %{_sharedstatedir}/%{name} + +%files -n python3-%{name} +%license LICENSE +%{python3_sitelib}/%{name}/ +%{python3_sitelib}/%{name}-%{version}.dist-info/ + +%changelog +%autochangelog From 660410495b29c901f7bd39710d9397c758258fb0 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 28 Apr 2026 10:55:59 -0500 Subject: [PATCH 02/18] fix(rpm): harden gateway systemd units and sysconfig defaults - Restrict sysconfig to 0640 (contains SSH handshake secret) - Auto-generate SSH handshake secret in %post on fresh install - Add security warning about TLS-disabled exposure in sysconfig - Add systemd hardening (NoNewPrivileges, ProtectSystem, PrivateTmp, RestrictAddressFamilies) to both system and user units - Make user unit self-contained with inline Environment= defaults instead of reading the system sysconfig (which contains secrets) --- openshell.spec | 74 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/openshell.spec b/openshell.spec index d05b7da11..dc41ac7ed 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260427180107195036.rpm.23.gcde20dc3%{?dist} +Release: 1.20260428102251900262.rpm.24.gf5a444a0%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -127,6 +127,12 @@ StateDirectory=openshell Restart=on-failure RestartSec=5 +# Security hardening +NoNewPrivileges=yes +ProtectSystem=strict +PrivateTmp=yes +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + [Install] WantedBy=multi-user.target EOF @@ -145,43 +151,82 @@ Wants=podman.socket [Service] Type=exec -# Read system defaults (optional) -- driver, images, TLS settings. -EnvironmentFile=-/etc/sysconfig/openshell-gateway -# Override the DB path for user-level state directory. -# %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. +# Self-contained defaults for rootless operation. +# Set OPENSHELL_SSH_HANDSHAKE_SECRET before starting: +# systemctl --user edit openshell-gateway.service +# and add: +# [Service] +# Environment=OPENSHELL_SSH_HANDSHAKE_SECRET= +# +# WARNING: TLS is disabled. The gateway has NO authentication and +# listens on all interfaces. For network-exposed setups, configure +# mTLS certificates and remove OPENSHELL_DISABLE_TLS. +Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db +Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest +Environment=OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest +Environment=OPENSHELL_DISABLE_TLS=true ExecStart=/usr/bin/openshell-gateway StateDirectory=openshell Restart=on-failure RestartSec=5 +# Security hardening +NoNewPrivileges=yes +ProtectSystem=strict +PrivateTmp=yes +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + [Install] WantedBy=default.target EOF # --- Gateway environment file --- # Provides defaults for the Podman driver and GHCR image references. +# Mode 0640: contains the SSH handshake secret -- must not be world-readable. # Admins can override these values by editing this file. install -d %{buildroot}%{_sysconfdir}/sysconfig +install -pm 0640 /dev/null %{buildroot}%{_sysconfdir}/sysconfig/%{name}-gateway cat > %{buildroot}%{_sysconfdir}/sysconfig/%{name}-gateway << 'EOF' # OpenShell Gateway configuration # See: openshell-gateway --help for all available options. -# Compute driver: use Podman for sandbox container lifecycle. -OPENSHELL_DRIVERS=podman +# ---- Required settings ---- + +# Shared secret for gateway-to-sandbox SSH handshake authentication. +# REQUIRED: Generate a value before starting the service: +# openssl rand -hex 32 +# The same secret must be shared with every sandbox that connects to +# this gateway. +OPENSHELL_SSH_HANDSHAKE_SECRET= # Database URL for gateway state persistence. +# For the system unit this defaults to /var/lib/openshell/gateway.db. +# The user unit overrides this to ~/.local/state/openshell/gateway.db. OPENSHELL_DB_URL=sqlite:///var/lib/openshell/gateway.db +# ---- Optional settings ---- + +# Compute driver: use Podman for sandbox container lifecycle. +OPENSHELL_DRIVERS=podman + # Supervisor image mounted into sandbox containers. OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest # Default sandbox base image. OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest -# Disable TLS for local single-node operation. -# For production, comment this out and configure --tls-cert, --tls-key, -# and --tls-client-ca. +# ---- SECURITY WARNING ---- +# TLS is disabled by default for ease of initial setup. With TLS +# disabled, the gateway has NO authentication and listens on ALL +# network interfaces (0.0.0.0:8080). Any host that can reach this +# port has full unauthenticated access to the API, including sandbox +# creation, command execution, and credential retrieval. +# +# For any deployment beyond single-user localhost testing: +# 1. Generate mTLS certificates (see OpenShell docs) +# 2. Set OPENSHELL_TLS_CERT, OPENSHELL_TLS_KEY, OPENSHELL_TLS_CLIENT_CA +# 3. Comment out OPENSHELL_DISABLE_TLS below OPENSHELL_DISABLE_TLS=true EOF @@ -232,6 +277,13 @@ touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata import version; v = version('openshell'); print(v); assert v == '%{version}', f'expected %{version}, got {v}'" %post gateway +# Generate SSH handshake secret on fresh install if not already set. +# Uses /dev/urandom to avoid requiring openssl at install time. +SYSCONFIG=%{_sysconfdir}/sysconfig/%{name}-gateway +if [ -f "$SYSCONFIG" ] && grep -q '^OPENSHELL_SSH_HANDSHAKE_SECRET=$' "$SYSCONFIG" 2>/dev/null; then + SECRET=$(head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n') + sed -i "s/^OPENSHELL_SSH_HANDSHAKE_SECRET=$/OPENSHELL_SSH_HANDSHAKE_SECRET=${SECRET}/" "$SYSCONFIG" +fi %systemd_post %{name}-gateway.service %systemd_user_post %{name}-gateway.service @@ -253,7 +305,7 @@ PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata %{_bindir}/%{name}-gateway %{_unitdir}/%{name}-gateway.service %{_userunitdir}/%{name}-gateway.service -%config(noreplace) %{_sysconfdir}/sysconfig/%{name}-gateway +%attr(0640,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/%{name}-gateway %dir %{_sharedstatedir}/%{name} %files -n python3-%{name} From eb837bc64ea469982fab37f121088c0e49d68a88 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 28 Apr 2026 11:30:21 -0500 Subject: [PATCH 03/18] fix(rpm): auto-generate SSH handshake secret for user unit The user unit was missing OPENSHELL_SSH_HANDSHAKE_SECRET since it no longer reads the system sysconfig. Add an ExecStartPre that generates a random secret into ~/.config/openshell/gateway.env on first start, and an EnvironmentFile directive to read it back. --- openshell.spec | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/openshell.spec b/openshell.spec index dc41ac7ed..2ac3e2c3a 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260428102251900262.rpm.24.gf5a444a0%{?dist} +Release: 1.20260428105655427965.rpm.25.gee990b25%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -152,15 +152,19 @@ Wants=podman.socket [Service] Type=exec # Self-contained defaults for rootless operation. -# Set OPENSHELL_SSH_HANDSHAKE_SECRET before starting: -# systemctl --user edit openshell-gateway.service -# and add: -# [Service] -# Environment=OPENSHELL_SSH_HANDSHAKE_SECRET= # # WARNING: TLS is disabled. The gateway has NO authentication and # listens on all interfaces. For network-exposed setups, configure # mTLS certificates and remove OPENSHELL_DISABLE_TLS. +# +# The SSH handshake secret is auto-generated on first start into +# ~/.config/openshell/gateway.env (mode 0600). To override, edit +# that file or use: systemctl --user edit openshell-gateway.service + +# Auto-generate SSH handshake secret on first start if not present. +# %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. +ExecStartPre=/bin/sh -c 'ENV=%%E/openshell/gateway.env; [ -f "$ENV" ] || { mkdir -p %%E/openshell && echo "OPENSHELL_SSH_HANDSHAKE_SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f)" > "$ENV" && chmod 600 "$ENV"; }' +EnvironmentFile=-%%E/openshell/gateway.env Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest From 842afab3a4bf09f18865de5d7ae6bb0dfa423577 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 28 Apr 2026 14:15:40 -0500 Subject: [PATCH 04/18] fix(rpm): depend on podman.service instead of podman.socket Socket activation was unreliable on some Fedora systems, causing the gateway to fail with 'Connection refused' on the podman socket. Switch to Requires=podman.service + After=podman.service so the podman API server is running before the gateway starts. --- openshell.spec | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openshell.spec b/openshell.spec index 2ac3e2c3a..35bcecb8b 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260428105655427965.rpm.25.gee990b25%{?dist} +Release: 1.20260428113138606997.rpm.26.g63d59e2a%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -116,8 +116,8 @@ cat > %{buildroot}%{_unitdir}/%{name}-gateway.service << 'EOF' [Unit] Description=OpenShell Gateway Documentation=https://github.com/NVIDIA/OpenShell -After=network-online.target podman.socket -Wants=podman.socket +After=network-online.target podman.service +Requires=podman.service [Service] Type=exec @@ -139,15 +139,15 @@ EOF # --- Gateway systemd user unit (rootless Podman) --- # Installed to the systemd user unit directory so any user can run: -# systemctl --user start podman.socket # systemctl --user enable --now openshell-gateway.service +# This will automatically start podman.service via Requires= dependency. install -d %{buildroot}%{_userunitdir} cat > %{buildroot}%{_userunitdir}/%{name}-gateway.service << 'EOF' [Unit] Description=OpenShell Gateway (user) Documentation=https://github.com/NVIDIA/OpenShell -After=podman.socket -Wants=podman.socket +After=podman.service +Requires=podman.service [Service] Type=exec From 20f3e4b90ad9dc4f6b36ac1b2a4ce2aae080ebb8 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 28 Apr 2026 16:22:12 -0500 Subject: [PATCH 05/18] feat(ci): add supervisor-sideload as distinct build and release target Introduce supervisor-sideload image alongside the existing supervisor image in CI workflows, docker build scripts, and RPM packaging. The sideload variant (FROM scratch) is intended for Podman image-volume mounts used by the RPM-packaged gateway. --- .github/workflows/release-dev.yml | 11 +++++++++-- .github/workflows/release-tag.yml | 11 +++++++++-- openshell.spec | 6 +++--- tasks/docker.toml | 2 +- tasks/scripts/docker-build-image.sh | 12 ++++++++++-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 5563a67eb..b93a2ac7b 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -63,6 +63,13 @@ jobs: component: supervisor cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + build-supervisor-sideload: + needs: [compute-versions] + uses: ./.github/workflows/docker-build.yml + with: + component: supervisor-sideload + cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + build-cluster: needs: [compute-versions] uses: ./.github/workflows/docker-build.yml @@ -79,7 +86,7 @@ jobs: tag-ghcr-dev: name: Tag GHCR Images as Dev - needs: [build-gateway, build-supervisor, build-cluster] + needs: [build-gateway, build-supervisor, build-supervisor-sideload, build-cluster] runs-on: build-amd64 timeout-minutes: 10 steps: @@ -90,7 +97,7 @@ jobs: run: | set -euo pipefail REGISTRY="ghcr.io/nvidia/openshell" - for component in gateway supervisor cluster; do + for component in gateway supervisor supervisor-sideload cluster; do echo "Tagging ${REGISTRY}/${component}:${{ github.sha }} as dev..." docker buildx imagetools create \ --prefer-index=false \ diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index 7df792cba..654501767 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -78,6 +78,13 @@ jobs: component: supervisor cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + build-supervisor-sideload: + needs: [compute-versions] + uses: ./.github/workflows/docker-build.yml + with: + component: supervisor-sideload + cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + build-cluster: needs: [compute-versions] uses: ./.github/workflows/docker-build.yml @@ -94,7 +101,7 @@ jobs: tag-ghcr-release: name: Tag GHCR Images for Release - needs: [compute-versions, build-gateway, build-supervisor, build-cluster, e2e] + needs: [compute-versions, build-gateway, build-supervisor, build-supervisor-sideload, build-cluster, e2e] runs-on: build-amd64 timeout-minutes: 10 steps: @@ -106,7 +113,7 @@ jobs: set -euo pipefail REGISTRY="ghcr.io/nvidia/openshell" VERSION="${{ needs.compute-versions.outputs.semver }}" - for component in gateway supervisor cluster; do + for component in gateway supervisor supervisor-sideload cluster; do echo "Tagging ${REGISTRY}/${component}:${{ github.sha }} as ${VERSION} and latest..." docker buildx imagetools create \ --prefer-index=false \ diff --git a/openshell.spec b/openshell.spec index 35bcecb8b..82926f40d 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260428113138606997.rpm.26.g63d59e2a%{?dist} +Release: 1.20260428141722522502.rpm.27.g09c857c1%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -167,7 +167,7 @@ ExecStartPre=/bin/sh -c 'ENV=%%E/openshell/gateway.env; [ -f "$ENV" ] || { mkdir EnvironmentFile=-%%E/openshell/gateway.env Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db -Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest +Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest Environment=OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest Environment=OPENSHELL_DISABLE_TLS=true ExecStart=/usr/bin/openshell-gateway @@ -215,7 +215,7 @@ OPENSHELL_DB_URL=sqlite:///var/lib/openshell/gateway.db OPENSHELL_DRIVERS=podman # Supervisor image mounted into sandbox containers. -OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest +OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest # Default sandbox base image. OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest diff --git a/tasks/docker.toml b/tasks/docker.toml index b952d559c..d3b96a9e4 100644 --- a/tasks/docker.toml +++ b/tasks/docker.toml @@ -35,7 +35,7 @@ hide = true ["build:docker:supervisor-sideload"] description = "Build the supervisor sideload image (FROM scratch, for Podman image-volume mount)" -run = "tasks/scripts/docker-build-image.sh supervisor-output" +run = "tasks/scripts/docker-build-image.sh supervisor-sideload" hide = true ["build:docker:cluster"] diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 537b2a2cc..916217dbd 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -43,7 +43,7 @@ required_prebuilt_binaries() { gateway) echo "openshell-gateway" ;; - supervisor|cluster|supervisor-output) + supervisor|cluster|supervisor-sideload|supervisor-output) echo "openshell-sandbox" ;; esac @@ -90,7 +90,7 @@ ensure_prebuilt_binaries() { fi } -TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} +TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} shift DOCKERFILE="deploy/docker/Dockerfile.images" @@ -118,6 +118,14 @@ case "${TARGET}" in IMAGE_NAME="openshell/cluster" DOCKER_TARGET="cluster" ;; + supervisor-builder) + DOCKER_TARGET="supervisor-builder" + ;; + supervisor-sideload) + IS_FINAL_IMAGE=1 + IMAGE_NAME="openshell/supervisor-sideload" + DOCKER_TARGET="supervisor-output" + ;; supervisor-output) IS_FINAL_IMAGE=1 IMAGE_NAME="openshell/supervisor" From df4d13eabf3dcbde791caf979a5775bfc066d9b1 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 29 Apr 2026 09:01:28 -0500 Subject: [PATCH 06/18] feat(server): make gateway bind address configurable via OPENSHELL_BIND_HOST The gateway previously hardcoded 0.0.0.0 for all listeners. Add a --host / OPENSHELL_BIND_HOST parameter so the bind address can be set at runtime. The binary default remains 0.0.0.0 for backward compatibility, but the RPM sysconfig and systemd units default to 127.0.0.1 since the RPM targets single-host deployments that should not expose the unauthenticated API on the network. --- crates/openshell-server/src/cli.rs | 5 +++-- openshell.spec | 25 +++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index ae90c8b34..d61df1116 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -22,6 +22,7 @@ use crate::{run_server, tracing_bus::TracingLogBus}; #[command(version = openshell_core::VERSION)] #[command(about = "OpenShell gRPC/HTTP server", long_about = None)] struct Args { + /// Port to bind the server to. #[arg(long, default_value_t = DEFAULT_SERVER_PORT, env = "OPENSHELL_SERVER_PORT")] port: u16, @@ -332,7 +333,7 @@ async fn run_from_args(args: Args) -> Result<()> { args.port )); } - let health_bind = SocketAddr::from(([0, 0, 0, 0], args.health_port)); + let health_bind = SocketAddr::from((args.bind_address, args.health_port)); config = config.with_health_bind_address(health_bind); } @@ -349,7 +350,7 @@ async fn run_from_args(args: Args) -> Result<()> { args.health_port )); } - let metrics_bind = SocketAddr::from(([0, 0, 0, 0], args.metrics_port)); + let metrics_bind = SocketAddr::from((args.bind_address, args.metrics_port)); config = config.with_metrics_bind_address(metrics_bind); } diff --git a/openshell.spec b/openshell.spec index 82926f40d..b3131c658 100644 --- a/openshell.spec +++ b/openshell.spec @@ -153,9 +153,10 @@ Requires=podman.service Type=exec # Self-contained defaults for rootless operation. # -# WARNING: TLS is disabled. The gateway has NO authentication and -# listens on all interfaces. For network-exposed setups, configure -# mTLS certificates and remove OPENSHELL_DISABLE_TLS. +# WARNING: TLS is disabled. The gateway has NO authentication. +# It binds to localhost by default; if you change OPENSHELL_BIND_HOST +# to a non-loopback address, configure mTLS certificates and remove +# OPENSHELL_DISABLE_TLS. # # The SSH handshake secret is auto-generated on first start into # ~/.config/openshell/gateway.env (mode 0600). To override, edit @@ -165,6 +166,7 @@ Type=exec # %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. ExecStartPre=/bin/sh -c 'ENV=%%E/openshell/gateway.env; [ -f "$ENV" ] || { mkdir -p %%E/openshell && echo "OPENSHELL_SSH_HANDSHAKE_SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f)" > "$ENV" && chmod 600 "$ENV"; }' EnvironmentFile=-%%E/openshell/gateway.env +Environment=OPENSHELL_BIND_HOST=127.0.0.1 Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest @@ -220,14 +222,21 @@ OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest # Default sandbox base image. OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest +# Bind the gateway to localhost only (single-host use case). +# Change to 0.0.0.0 if the gateway must accept connections from +# other hosts on the network. +OPENSHELL_BIND_HOST=127.0.0.1 + # ---- SECURITY WARNING ---- # TLS is disabled by default for ease of initial setup. With TLS -# disabled, the gateway has NO authentication and listens on ALL -# network interfaces (0.0.0.0:8080). Any host that can reach this -# port has full unauthenticated access to the API, including sandbox -# creation, command execution, and credential retrieval. +# disabled, the gateway has NO authentication. The default bind +# address is localhost (127.0.0.1), limiting access to the local +# machine. If you change OPENSHELL_BIND_HOST to a non-loopback +# address, any host that can reach the gateway port has full +# unauthenticated access to the API, including sandbox creation, +# command execution, and credential retrieval. # -# For any deployment beyond single-user localhost testing: +# For any network-exposed deployment: # 1. Generate mTLS certificates (see OpenShell docs) # 2. Set OPENSHELL_TLS_CERT, OPENSHELL_TLS_KEY, OPENSHELL_TLS_CLIENT_CA # 3. Comment out OPENSHELL_DISABLE_TLS below From a2c7436e47da159edafbb2997935e5c5eef7cca0 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 29 Apr 2026 13:43:21 -0500 Subject: [PATCH 07/18] fix(cli): gateway destroy on non-Docker systems fails looking for docker.sock gateway add registrations on loopback were misclassified as embedded Docker containers, causing destroy/stop to attempt Docker socket connections on systems using Podman or other external runtimes. Add client_lifecycle_managed field to GatewayMetadata to distinguish gateway start deployments (client-managed) from gateway add registrations (externally-managed). Destroy/stop now skips container operations for externally-managed gateways. Legacy metadata without the field preserves existing behavior. Also gitignore RPM build artifacts (*.src.rpm, *.tar.gz, *.tar.xz). --- .gitignore | 6 ++ crates/openshell-bootstrap/src/metadata.rs | 71 ++++++++++++++++++++ crates/openshell-cli/src/completers.rs | 1 + crates/openshell-cli/src/main.rs | 1 + crates/openshell-cli/src/run.rs | 77 +++++++++++++++++++++- crates/openshell-vm/src/lib.rs | 1 + 6 files changed, 156 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 915c90d9d..1b37bfd49 100644 --- a/.gitignore +++ b/.gitignore @@ -206,5 +206,11 @@ rfc.md .worktrees .z3-trace +# RPM build artifacts +*.src.rpm +*.tar.gz +*.tar.xz +*.tar.bz2 + # Markdown/mermaid lint tooling deps scripts/lint-mermaid/node_modules/ diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index beadcbeac..02e01b17a 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -65,6 +65,18 @@ pub struct GatewayMetadata { /// When set, tokens will include these scopes for fine-grained access control. #[serde(default, skip_serializing_if = "Option::is_none")] pub oidc_scopes: Option, + + /// Whether the CLI manages this gateway's full lifecycle (deploy, + /// stop, destroy). + /// + /// - `Some(true)` — deployed via `gateway start`; destroy/stop operate on + /// the underlying container or VM. + /// - `Some(false)` — registered via `gateway add`; destroy/stop only remove + /// the local registration metadata. + /// - `None` — legacy metadata written before this field existed; the CLI + /// falls back to the previous heuristic (`is_remote`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub client_lifecycle_managed: Option, } impl GatewayMetadata { @@ -153,6 +165,7 @@ pub fn create_gateway_metadata_with_host( remote_host, resolved_host, auth_mode: disable_tls.then(|| "plaintext".to_string()), + client_lifecycle_managed: Some(true), ..Default::default() } } @@ -502,6 +515,64 @@ mod tests { assert!(parsed.resolved_host.is_none()); } + #[test] + fn metadata_deserialize_without_client_lifecycle_managed_field() { + // Legacy metadata files won't have the client_lifecycle_managed field. + // Ensure backwards compatibility: defaults to None. + let json = r#"{ + "name": "test", + "gateway_endpoint": "https://127.0.0.1:8080", + "is_remote": false, + "gateway_port": 8080 + }"#; + let parsed: GatewayMetadata = serde_json::from_str(json).unwrap(); + assert_eq!(parsed.client_lifecycle_managed, None); + } + + #[test] + fn metadata_roundtrip_with_client_lifecycle_managed_field() { + let meta = GatewayMetadata { + name: "test".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + client_lifecycle_managed: Some(false), + }; + let json = serde_json::to_string(&meta).unwrap(); + assert!(json.contains(r#""client_lifecycle_managed":false"#)); + let parsed: GatewayMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.client_lifecycle_managed, Some(false)); + } + + #[test] + fn metadata_omits_client_lifecycle_managed_when_none() { + let meta = GatewayMetadata { + name: "test".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + client_lifecycle_managed: None, + }; + let json = serde_json::to_string(&meta).unwrap(); + assert!(!json.contains("client_lifecycle_managed")); + } + + #[test] + fn create_gateway_metadata_sets_client_lifecycle_managed_true() { + let meta = create_gateway_metadata("test", None, 8080); + assert_eq!(meta.client_lifecycle_managed, Some(true)); + } + #[test] fn local_gateway_metadata_with_gateway_host_override() { let meta = create_gateway_metadata_with_host( diff --git a/crates/openshell-cli/src/completers.rs b/crates/openshell-cli/src/completers.rs index d5d9a0a88..d8ba3ff93 100644 --- a/crates/openshell-cli/src/completers.rs +++ b/crates/openshell-cli/src/completers.rs @@ -178,6 +178,7 @@ mod tests { gateway_endpoint: "https://alpha.example.com".to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }, ) diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index ccad7a099..59f41e739 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2976,6 +2976,7 @@ mod tests { gateway_endpoint: endpoint.to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index eaadf7908..37488ba5f 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -895,6 +895,7 @@ fn plaintext_gateway_metadata( remote_host, resolved_host, auth_mode: Some("plaintext".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } @@ -1186,6 +1187,7 @@ pub async fn gateway_add( remote_host, resolved_host, auth_mode: Some("mtls".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }; @@ -1211,6 +1213,7 @@ pub async fn gateway_add( gateway_endpoint: endpoint.clone(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() }; @@ -1736,10 +1739,20 @@ fn resolve_gateway_control_target_from( } match metadata { - Some(metadata) if metadata.is_remote => metadata.remote_host.map_or( + // Not client-managed (`gateway add`) — the gateway lifecycle is + // managed externally (e.g. systemd, Podman, bare-metal); only + // remove the local registration metadata on destroy/stop. + Some(ref m) if m.client_lifecycle_managed == Some(false) => { + GatewayControlTarget::ExternalRegistration + } + // Remote gateway with SSH destination — managed remote container. + Some(ref m) if m.is_remote => m.remote_host.clone().map_or( GatewayControlTarget::ExternalRegistration, GatewayControlTarget::Remote, ), + // Client-managed (`gateway start`) or legacy metadata (no + // `client_lifecycle_managed` field) — treat as a + // locally-managed container. _ => GatewayControlTarget::Local, } } @@ -5790,6 +5803,7 @@ mod tests { gateway_endpoint: endpoint.to_string(), is_remote: true, auth_mode: Some("cloudflare_jwt".to_string()), + client_lifecycle_managed: Some(false), ..Default::default() } } @@ -6093,6 +6107,65 @@ mod tests { } } + #[test] + fn resolve_gateway_control_target_non_managed_loopback_is_external() { + // A gateway registered via `gateway add http://localhost:8080` should + // be classified as an external registration, not a local container. + let metadata = GatewayMetadata { + name: "localhost".to_string(), + gateway_endpoint: "http://localhost:8080".to_string(), + is_remote: false, + gateway_port: 0, + remote_host: None, + resolved_host: None, + auth_mode: Some("plaintext".to_string()), + edge_team_domain: None, + edge_auth_url: None, + client_lifecycle_managed: Some(false), + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::ExternalRegistration)); + } + + #[test] + fn resolve_gateway_control_target_managed_gateway_is_local() { + // A gateway deployed via `gateway start` should be classified as local. + let metadata = GatewayMetadata { + name: "openshell".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + client_lifecycle_managed: Some(true), + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::Local)); + } + + #[test] + fn resolve_gateway_control_target_legacy_metadata_defaults_to_local() { + // Legacy metadata without the `client_lifecycle_managed` field + // should preserve the existing behavior: non-remote → Local. + let metadata = GatewayMetadata { + name: "openshell".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + client_lifecycle_managed: None, + }; + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::Local)); + } + #[test] fn gateway_select_uses_explicit_name_without_prompting() { let tmpdir = tempfile::tempdir().expect("create tmpdir"); @@ -6266,6 +6339,7 @@ mod tests { let metadata = load_gateway_metadata("127.0.0.1").expect("load stored gateway"); assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext")); assert!(!metadata.is_remote); + assert_eq!(metadata.client_lifecycle_managed, Some(false)); assert_eq!(metadata.gateway_endpoint, "http://127.0.0.1:8080"); assert_eq!(load_active_gateway().as_deref(), Some("127.0.0.1")); }); @@ -6295,6 +6369,7 @@ mod tests { let metadata = load_gateway_metadata("dev-http").expect("load stored gateway"); assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext")); assert!(!metadata.is_remote); + assert_eq!(metadata.client_lifecycle_managed, Some(false)); assert_eq!(metadata.gateway_endpoint, "http://gateway.example.com:8080"); assert_eq!(load_active_gateway().as_deref(), Some("dev-http")); }); diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index ba5d64663..2f8280b9c 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -1727,6 +1727,7 @@ fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Re name: gateway_name.to_string(), gateway_endpoint: format!("https://127.0.0.1:{gateway_port}"), gateway_port, + client_lifecycle_managed: Some(true), ..Default::default() }; From bcdac0413692dcfe6e3350751130f74e34cc3393 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 29 Apr 2026 19:46:03 -0500 Subject: [PATCH 08/18] refactor(docker): consolidate supervisor and supervisor-sideload into single image The supervisor-sideload image (FROM scratch, binary only) was a separate build artifact for the Podman driver's image-volume mount. The Ubuntu-based supervisor image served the Docker driver's binary extraction path. Both use cases work with a single FROM scratch image: the Docker driver extracts the binary via the container archive API (never starts the container), and the Podman driver mounts the image filesystem directly. Consolidate to a single supervisor image (FROM scratch) and remove all supervisor-sideload references from CI workflows, build tasks, RPM spec, e2e tests, and architecture docs. The supervisor-output build target is retained as a backward-compat alias. --- .github/workflows/release-dev.yml | 11 ++------- .github/workflows/release-tag.yml | 11 ++------- architecture/podman-driver.md | 2 +- crates/openshell-driver-docker/src/lib.rs | 11 +++++---- deploy/docker/Dockerfile.images | 29 ++++++----------------- e2e/rust/e2e-podman.sh | 4 ++-- openshell.spec | 4 ++-- tasks/docker.toml | 13 +--------- tasks/scripts/docker-build-image.sh | 10 +++----- tasks/scripts/gateway-docker.sh | 2 +- tasks/test.toml | 2 +- 11 files changed, 28 insertions(+), 71 deletions(-) diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index b93a2ac7b..5563a67eb 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -63,13 +63,6 @@ jobs: component: supervisor cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} - build-supervisor-sideload: - needs: [compute-versions] - uses: ./.github/workflows/docker-build.yml - with: - component: supervisor-sideload - cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} - build-cluster: needs: [compute-versions] uses: ./.github/workflows/docker-build.yml @@ -86,7 +79,7 @@ jobs: tag-ghcr-dev: name: Tag GHCR Images as Dev - needs: [build-gateway, build-supervisor, build-supervisor-sideload, build-cluster] + needs: [build-gateway, build-supervisor, build-cluster] runs-on: build-amd64 timeout-minutes: 10 steps: @@ -97,7 +90,7 @@ jobs: run: | set -euo pipefail REGISTRY="ghcr.io/nvidia/openshell" - for component in gateway supervisor supervisor-sideload cluster; do + for component in gateway supervisor cluster; do echo "Tagging ${REGISTRY}/${component}:${{ github.sha }} as dev..." docker buildx imagetools create \ --prefer-index=false \ diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index 654501767..7df792cba 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -78,13 +78,6 @@ jobs: component: supervisor cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} - build-supervisor-sideload: - needs: [compute-versions] - uses: ./.github/workflows/docker-build.yml - with: - component: supervisor-sideload - cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} - build-cluster: needs: [compute-versions] uses: ./.github/workflows/docker-build.yml @@ -101,7 +94,7 @@ jobs: tag-ghcr-release: name: Tag GHCR Images for Release - needs: [compute-versions, build-gateway, build-supervisor, build-supervisor-sideload, build-cluster, e2e] + needs: [compute-versions, build-gateway, build-supervisor, build-cluster, e2e] runs-on: build-amd64 timeout-minutes: 10 steps: @@ -113,7 +106,7 @@ jobs: set -euo pipefail REGISTRY="ghcr.io/nvidia/openshell" VERSION="${{ needs.compute-versions.outputs.semver }}" - for component in gateway supervisor supervisor-sideload cluster; do + for component in gateway supervisor cluster; do echo "Tagging ${REGISTRY}/${component}:${{ github.sha }} as ${VERSION} and latest..." docker buildx imagetools create \ --prefer-index=false \ diff --git a/architecture/podman-driver.md b/architecture/podman-driver.md index 155937a77..a577b1e8d 100644 --- a/architecture/podman-driver.md +++ b/architecture/podman-driver.md @@ -100,7 +100,7 @@ sequenceDiagram C->>C: entrypoint: /opt/openshell/bin/openshell-sandbox ``` -The supervisor image is a `FROM scratch` image containing only the prebuilt `openshell-sandbox` binary. It is built by the `supervisor-output` target in `deploy/docker/Dockerfile.images`. The `image_volumes` field in the container spec mounts this image's filesystem at `/opt/openshell/bin` with `rw: false`, making it a read-only overlay that the sandbox cannot tamper with. +The supervisor image is a `FROM scratch` image containing only the prebuilt `openshell-sandbox` binary. It is built by the `supervisor` target in `deploy/docker/Dockerfile.images`. The `image_volumes` field in the container spec mounts this image's filesystem at `/opt/openshell/bin` with `rw: false`, making it a read-only overlay that the sandbox cannot tamper with. ## Network Model diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index 8b8df5b89..5a806c343 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -61,8 +61,9 @@ const HOST_OPENSHELL_INTERNAL_HOSTS_ENTRY: &str = "host.openshell.internal:127.0 /// explicit `--docker-supervisor-bin` override or local build is available. const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; -/// Path to the supervisor binary inside the `openshell/supervisor` image. -const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/usr/local/bin/openshell-sandbox"; +/// Path to the supervisor binary inside the `openshell/supervisor` image +/// (a `FROM scratch` image containing only the binary). +const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/openshell-sandbox"; /// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference /// used when no supervisor binary override is provided. @@ -1431,8 +1432,8 @@ fn linux_supervisor_candidates(daemon_arch: &str) -> Vec { } /// Pull the supervisor image (if not already local), extract -/// `/usr/local/bin/openshell-sandbox` to a host cache keyed by the image's -/// content digest, and return the cache path. +/// `/openshell-sandbox` to a host cache keyed by the image's content +/// digest, and return the cache path. /// /// The extraction is atomic: the binary is written to a sibling temp file /// inside the digest-keyed directory and renamed into place, so concurrent @@ -1528,7 +1529,7 @@ async fn extract_supervisor_binary_bytes(docker: &Docker, image: &str) -> CoreRe ), ContainerCreateBody { image: Some(image.to_string()), - entrypoint: Some(vec!["/bin/true".to_string()]), + entrypoint: Some(vec!["/openshell-sandbox".to_string()]), cmd: Some(Vec::new()), ..Default::default() }, diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index ebe5e267e..ab1c7f989 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -7,9 +7,8 @@ # # Targets: # gateway Final gateway image -# supervisor Final supervisor image +# supervisor Final supervisor image (FROM scratch, binary only) # cluster Final cluster image -# supervisor-output Minimal stage exporting only the supervisor binary # # Rust binaries are built natively before the image build and staged at: # deploy/docker/.build/prebuilt-binaries//openshell-{gateway,sandbox} @@ -38,10 +37,7 @@ ARG TARGETARCH # download-artifact, which strip exec perms during the roundtrip. COPY --chmod=755 deploy/docker/.build/prebuilt-binaries/${TARGETARCH}/openshell-sandbox /build/out/openshell-sandbox -# Minimal extraction stage for fast-deploy: exports only the supervisor -# binary (~20-40 MB) instead of the entire build environment (~968 MB). -FROM scratch AS supervisor-output -COPY --from=supervisor-binary /build/out/openshell-sandbox /openshell-sandbox + # --------------------------------------------------------------------------- # Final gateway image @@ -71,22 +67,11 @@ CMD ["--port", "8080"] # --------------------------------------------------------------------------- # Final supervisor image # --------------------------------------------------------------------------- -FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 AS supervisor - -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates && \ - apt-get install -y --only-upgrade gpgv && \ - rm -rf /var/lib/apt/lists/* - -RUN useradd --create-home --user-group openshell - -WORKDIR /app - -COPY --from=supervisor-binary /build/out/openshell-sandbox /usr/local/bin/ - -USER openshell - -ENTRYPOINT ["openshell-sandbox"] +# Minimal FROM scratch image containing only the supervisor binary. +# Used by both the Docker driver (binary extraction) and the Podman driver +# (OCI image volume mount at /opt/openshell/bin). +FROM scratch AS supervisor +COPY --from=supervisor-binary /build/out/openshell-sandbox /openshell-sandbox # --------------------------------------------------------------------------- # Cluster asset stages diff --git a/e2e/rust/e2e-podman.sh b/e2e/rust/e2e-podman.sh index 38c6e6b7c..e2681e008 100755 --- a/e2e/rust/e2e-podman.sh +++ b/e2e/rust/e2e-podman.sh @@ -20,7 +20,7 @@ # # Prerequisites: # - Rootless Podman service running (systemctl --user start podman.socket) -# - Supervisor sideload image built (mise run build:docker:supervisor-sideload) +# - Supervisor image built (mise run build:docker:supervisor) # - Sandbox base image available locally set -euo pipefail @@ -68,7 +68,7 @@ SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-}" # Verify the supervisor image exists locally. if ! podman image exists "${SUPERVISOR_IMAGE}" 2>/dev/null; then echo "ERROR: supervisor image '${SUPERVISOR_IMAGE}' not found locally." - echo "Build it with: mise run build:docker:supervisor-sideload" + echo "Build it with: mise run build:docker:supervisor" exit 1 fi diff --git a/openshell.spec b/openshell.spec index b3131c658..88c2b1b40 100644 --- a/openshell.spec +++ b/openshell.spec @@ -169,7 +169,7 @@ EnvironmentFile=-%%E/openshell/gateway.env Environment=OPENSHELL_BIND_HOST=127.0.0.1 Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db -Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest +Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest Environment=OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest Environment=OPENSHELL_DISABLE_TLS=true ExecStart=/usr/bin/openshell-gateway @@ -217,7 +217,7 @@ OPENSHELL_DB_URL=sqlite:///var/lib/openshell/gateway.db OPENSHELL_DRIVERS=podman # Supervisor image mounted into sandbox containers. -OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor-sideload:latest +OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest # Default sandbox base image. OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest diff --git a/tasks/docker.toml b/tasks/docker.toml index d3b96a9e4..78796d868 100644 --- a/tasks/docker.toml +++ b/tasks/docker.toml @@ -9,7 +9,6 @@ depends = [ "build:docker:gateway", "build:docker:cluster", "build:docker:supervisor", - "build:docker:supervisor-sideload", ] hide = true @@ -29,15 +28,10 @@ run = "tasks/scripts/docker-build-image.sh gateway" hide = true ["build:docker:supervisor"] -description = "Build the standalone supervisor Docker image (Ubuntu-based, for K8s pods)" +description = "Build the supervisor image (FROM scratch, binary only)" run = "tasks/scripts/docker-build-image.sh supervisor" hide = true -["build:docker:supervisor-sideload"] -description = "Build the supervisor sideload image (FROM scratch, for Podman image-volume mount)" -run = "tasks/scripts/docker-build-image.sh supervisor-sideload" -hide = true - ["build:docker:cluster"] description = "Build the k3s cluster image (component images pulled at runtime from registry)" run = "tasks/scripts/docker-build-image.sh cluster" @@ -53,11 +47,6 @@ description = "Alias for build:docker:supervisor" depends = ["build:docker:supervisor"] hide = true -["docker:build:supervisor-sideload"] -description = "Alias for build:docker:supervisor-sideload" -depends = ["build:docker:supervisor-sideload"] -hide = true - ["docker:build:cluster"] description = "Alias for build:docker:cluster" depends = ["build:docker:cluster"] diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 916217dbd..997a631ad 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -90,7 +90,7 @@ ensure_prebuilt_binaries() { fi } -TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} +TARGET=${1:?"Usage: docker-build-image.sh [extra-args...]"} shift DOCKERFILE="deploy/docker/Dockerfile.images" @@ -121,15 +121,11 @@ case "${TARGET}" in supervisor-builder) DOCKER_TARGET="supervisor-builder" ;; - supervisor-sideload) - IS_FINAL_IMAGE=1 - IMAGE_NAME="openshell/supervisor-sideload" - DOCKER_TARGET="supervisor-output" - ;; supervisor-output) + # Backward-compat alias: same as "supervisor". IS_FINAL_IMAGE=1 IMAGE_NAME="openshell/supervisor" - DOCKER_TARGET="supervisor-output" + DOCKER_TARGET="supervisor" ;; *) echo "Error: unsupported target '${TARGET}'" >&2 diff --git a/tasks/scripts/gateway-docker.sh b/tasks/scripts/gateway-docker.sh index a481692a3..269933c96 100644 --- a/tasks/scripts/gateway-docker.sh +++ b/tasks/scripts/gateway-docker.sh @@ -137,7 +137,7 @@ if [[ "${HOST_OS}" == "Linux" && "${HOST_ARCH}" == "${DAEMON_ARCH}" ]]; then cp "${ROOT}/target/${SUPERVISOR_TARGET}/debug/openshell-sandbox" "${SUPERVISOR_BIN}" else # Cross-compile through the prebuilt-binary staging helper, then use the - # supervisor-output stage to extract just the openshell-sandbox binary. + # supervisor stage to extract just the openshell-sandbox binary. # # This task is gated on a working Docker daemon above, so pin the # container-engine helper to docker — otherwise it auto-detects podman diff --git a/tasks/test.toml b/tasks/test.toml index 90ef47190..df4a1db20 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -50,7 +50,7 @@ run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2 ["e2e:podman"] description = "Start a Podman-backed gateway and run smoke e2e (requires rootless Podman; pass -- --port=N to override)" -depends = ["build:docker:supervisor-sideload"] +depends = ["build:docker:supervisor"] run = "e2e/rust/e2e-podman.sh" ["e2e:vm"] From 7ac190cb35d98e6afc9bf8a3686b005b2517aad7 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 30 Apr 2026 14:09:55 -0500 Subject: [PATCH 09/18] feat(podman): enable mTLS by default for RPM gateway packaging The Podman driver previously ran with TLS disabled, and the RPM bound the gateway to 127.0.0.1 to limit exposure. However, loopback-bound services are unreachable from Podman containers in both bridge and pasta networking modes, breaking sandbox-to-gateway communication. Instead of fighting the container networking model, solve the security concern directly: bind to 0.0.0.0 with mTLS enabled by default. PKI bootstrap: - Add deploy/rpm/init-pki.sh that generates a self-signed CA, server cert, and client cert using OpenSSL on first gateway start - Called from ExecStartPre in the systemd user unit (idempotent) - Client certs copied to CLI auto-discovery directory so the CLI connects with mTLS without manual setup Podman driver TLS injection: - Add tls_ca/tls_cert/tls_key config fields to PodmanComputeConfig - Bind-mount client certs into sandbox containers at /etc/openshell/tls/client/ (read-only) - Set OPENSHELL_TLS_CA/CERT/KEY env vars so the supervisor connects with mTLS - Auto-detect endpoint scheme (http vs https) based on TLS config RPM packaging: - Revert OPENSHELL_BIND_HOST from 127.0.0.1 to 0.0.0.0 - Remove OPENSHELL_DISABLE_TLS=true from defaults - Add TLS cert path env vars for gateway and Podman driver - Add init-pki.sh to /usr/libexec/openshell/ - Add GATEWAY-CONFIG.md documentation to /usr/share/doc/ - Add openssl as a dependency --- architecture/podman-driver.md | 12 ++ architecture/podman-rootless-networking.md | 37 +++- crates/openshell-driver-podman/src/client.rs | 12 ++ crates/openshell-driver-podman/src/config.rs | 22 ++ .../openshell-driver-podman/src/container.rs | 128 ++++++++++- crates/openshell-driver-podman/src/driver.rs | 83 +++++--- crates/openshell-driver-podman/src/main.rs | 15 ++ crates/openshell-server/src/lib.rs | 19 ++ deploy/rpm/GATEWAY-CONFIG.md | 201 ++++++++++++++++++ deploy/rpm/init-pki.sh | 121 +++++++++++ openshell.spec | 79 ++++--- 11 files changed, 657 insertions(+), 72 deletions(-) create mode 100644 deploy/rpm/GATEWAY-CONFIG.md create mode 100755 deploy/rpm/init-pki.sh diff --git a/architecture/podman-driver.md b/architecture/podman-driver.md index a577b1e8d..c6594f76a 100644 --- a/architecture/podman-driver.md +++ b/architecture/podman-driver.md @@ -102,6 +102,18 @@ sequenceDiagram The supervisor image is a `FROM scratch` image containing only the prebuilt `openshell-sandbox` binary. It is built by the `supervisor` target in `deploy/docker/Dockerfile.images`. The `image_volumes` field in the container spec mounts this image's filesystem at `/opt/openshell/bin` with `rw: false`, making it a read-only overlay that the sandbox cannot tamper with. +## TLS + +When the Podman driver's TLS configuration is set (`tls_ca`, `tls_cert`, `tls_key` in `PodmanComputeConfig`), the driver: + +1. Switches the auto-detected endpoint scheme from `http://` to `https://` +2. Bind-mounts the client cert files (read-only) into the container at `/etc/openshell/tls/client/` +3. Sets `OPENSHELL_TLS_CA`, `OPENSHELL_TLS_CERT`, `OPENSHELL_TLS_KEY` env vars pointing to the container-side paths + +The supervisor reads these env vars and uses them to establish an mTLS connection back to the gateway. + +The RPM packaging auto-generates a self-signed PKI on first start via `init-pki.sh`. Client certs are placed in the CLI auto-discovery directory (`~/.config/openshell/gateways/openshell/mtls/`) so the CLI connects with mTLS without manual configuration. See `deploy/rpm/GATEWAY-CONFIG.md` for the full RPM configuration reference. + ## Network Model Sandbox network isolation uses a two-layer approach: a Podman bridge network for container-to-host communication, and a nested network namespace (created by the supervisor) for sandbox process isolation. diff --git a/architecture/podman-rootless-networking.md b/architecture/podman-rootless-networking.md index d13b9ca84..5f35b3ba2 100644 --- a/architecture/podman-rootless-networking.md +++ b/architecture/podman-rootless-networking.md @@ -306,37 +306,52 @@ Supervisor proxy (10.200.0.1:3128 in container netns) ### Supervisor gRPC Callback to Gateway +The Podman driver auto-detects the callback endpoint scheme based on +whether TLS client certificates are configured. When the RPM's +auto-generated PKI is in place, the endpoint is +`https://host.containers.internal:8080` and the supervisor connects +with mTLS. Without TLS configuration, it falls back to +`http://host.containers.internal:8080`. + ```text Supervisor (container netns, 10.89.x.2) | - 1. gRPC connect to http://host.containers.internal:8080 + 1. mTLS connect to https://host.containers.internal:8080 (resolves to 169.254.1.2:8080 via /etc/hosts) + Client cert bind-mounted from host at /etc/openshell/tls/client/ | 2. Routed through container default gateway (bridge) | 3. Pasta translates: L2 frame -> host L4 socket - (pasta host-gateway mapping: 169.254.1.2 -> 127.0.0.1) | - 4. Host TCP socket connects to 127.0.0.1:8080 + 4. Host TCP socket connects to gateway (0.0.0.0:8080) | -Gateway (host, port 8080) +Gateway (host, 0.0.0.0:8080, mTLS enabled) | - 5. ConnectSupervisor bidirectional stream established - 6. Heartbeats every N seconds (gateway sends interval in SessionAccepted, default 15s) - 7. Reconnects with exponential backoff (1s initial, 30s max) on failure - 8. Same gRPC channel reused for RelayStream calls (no new TLS handshake) + 5. TLS handshake: server presents server cert, client presents client cert + 6. ConnectSupervisor bidirectional stream established + 7. Heartbeats every N seconds (gateway sends interval in SessionAccepted, default 15s) + 8. Reconnects with exponential backoff (1s initial, 30s max) on failure + 9. Same gRPC channel reused for RelayStream calls (no new TLS handshake) ``` +The gateway binds to `0.0.0.0` by default in the RPM packaging. mTLS +prevents unauthenticated access even though the gateway is reachable +from the network. Client certificates are auto-generated by +`init-pki.sh` on first start and bind-mounted into sandbox containers +by the Podman driver. See `deploy/rpm/GATEWAY-CONFIG.md` for the full +configuration reference. + ## Differences from the Kubernetes Driver | Aspect | Kubernetes | Podman (rootless pasta) | |--------|-----------|----------------------| | Container/Pod IP | Routable cluster-wide | Non-routable (10.89.x.x inside user namespace) | -| Network reachability | Pod IPs reachable from gateway | Bridge not routable from host; requires pasta port forwarding or `host.containers.internal` | -| Sandbox -> Gateway | Direct TCP to K8s service IP | `host.containers.internal` (169.254.1.2 via pasta) | +| Network reachability | Pod IPs reachable from gateway | Bridge not routable from host; requires `host.containers.internal` | +| Sandbox -> Gateway | Direct TCP to K8s service IP | `host.containers.internal` via bridge + pasta | | SSH transport | Reverse gRPC relay (`ConnectSupervisor` + `RelayStream`) -- same mechanism as Podman | Reverse gRPC relay (`ConnectSupervisor` + `RelayStream`) | | Port publishing | Not needed (routable IPs) | Ephemeral host port via pasta port forwarding | -| TLS | mTLS via K8s secrets | Disabled by default (loopback-only, `--disable-tls`) | +| TLS | mTLS via K8s secrets | mTLS via auto-generated PKI (RPM default) or `--disable-tls` | | DNS | Kubernetes CoreDNS | Podman bridge DNS (aardvark-dns, `dns_enabled: true`) | | Network policy | K8s NetworkPolicy (ingress restricted to gateway) | iptables inside inner sandbox netns | | Supervisor delivery | hostPath volume from k3s node | OCI image volume mount (FROM scratch image) | diff --git a/crates/openshell-driver-podman/src/client.rs b/crates/openshell-driver-podman/src/client.rs index 12ea0901f..69bfd69c0 100644 --- a/crates/openshell-driver-podman/src/client.rs +++ b/crates/openshell-driver-podman/src/client.rs @@ -234,6 +234,18 @@ pub struct HostInfo { pub cgroup_version: String, #[serde(default)] pub network_backend: String, + #[serde(default)] + pub security: SecurityInfo, +} + +/// Security-related fields from the Podman system info response. +/// +/// Podman returns `host.security.rootless: true` when the daemon is +/// running without root privileges (rootless mode). +#[derive(Debug, Clone, Default, serde::Deserialize)] +pub struct SecurityInfo { + #[serde(default)] + pub rootless: bool, } // ── Client ─────────────────────────────────────────────────────────────── diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 1586002ab..0c31e09b7 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -102,9 +102,25 @@ pub struct PodmanComputeConfig { /// Mounted read-only into sandbox containers at /opt/openshell/bin /// using Podman's `type=image` mount. pub supervisor_image: String, + /// Host path to the CA certificate for sandbox mTLS. + /// + /// When all three TLS paths (`tls_ca`, `tls_cert`, `tls_key`) are set, + /// the driver bind-mounts them into sandbox containers and switches the + /// auto-detected endpoint from `http://` to `https://`. + pub tls_ca: Option, + /// Host path to the client certificate for sandbox mTLS. + pub tls_cert: Option, + /// Host path to the client private key for sandbox mTLS. + pub tls_key: Option, } impl PodmanComputeConfig { + /// Returns `true` when all three TLS paths are configured. + #[must_use] + pub fn tls_enabled(&self) -> bool { + self.tls_ca.is_some() && self.tls_cert.is_some() && self.tls_key.is_some() + } + /// Resolve the default socket path from the environment. /// /// - **macOS**: `$HOME/.local/share/containers/podman/machine/podman.sock` @@ -148,6 +164,9 @@ impl Default for PodmanComputeConfig { ssh_handshake_skew_secs: DEFAULT_SSH_HANDSHAKE_SKEW_SECS, stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), + tls_ca: None, + tls_cert: None, + tls_key: None, } } } @@ -168,6 +187,9 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("ssh_handshake_skew_secs", &self.ssh_handshake_skew_secs) .field("stop_timeout_secs", &self.stop_timeout_secs) .field("supervisor_image", &self.supervisor_image) + .field("tls_ca", &self.tls_ca) + .field("tls_cert", &self.tls_cert) + .field("tls_key", &self.tls_key) .finish() } } diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index cc7bbc519..fd9f30a53 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -25,6 +25,11 @@ const CONTAINER_PREFIX: &str = "openshell-sandbox-"; /// Volume name prefix. const VOLUME_PREFIX: &str = "openshell-sandbox-"; +/// Container-side mount paths for client TLS materials. +const TLS_CA_MOUNT_PATH: &str = "/etc/openshell/tls/client/ca.crt"; +const TLS_CERT_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.crt"; +const TLS_KEY_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.key"; + /// Build a Podman container name from the sandbox name. #[must_use] pub fn container_name(sandbox_name: &str) -> String { @@ -260,6 +265,15 @@ fn build_env( env.insert("OPENSHELL_CONTAINER_IMAGE".into(), image.to_string()); env.insert("OPENSHELL_SANDBOX_COMMAND".into(), "sleep infinity".into()); + // 3. TLS client cert paths (when mTLS is enabled). These point to + // the container-side mount paths where the cert files are + // bind-mounted from the host. + if config.tls_enabled() { + env.insert("OPENSHELL_TLS_CA".into(), TLS_CA_MOUNT_PATH.into()); + env.insert("OPENSHELL_TLS_CERT".into(), TLS_CERT_MOUNT_PATH.into()); + env.insert("OPENSHELL_TLS_KEY".into(), TLS_KEY_MOUNT_PATH.into()); + } + env } @@ -462,12 +476,43 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi // directory does not exist on the host, so the mkdir inside the container // fails with EPERM. A private tmpfs gives the supervisor its own writable // /run/netns without needing host filesystem access. - mounts: vec![Mount { - kind: "tmpfs".into(), - source: "tmpfs".into(), - destination: "/run/netns".into(), - options: vec!["rw".into(), "nosuid".into(), "nodev".into()], - }], + mounts: { + let mut m = vec![Mount { + kind: "tmpfs".into(), + source: "tmpfs".into(), + destination: "/run/netns".into(), + options: vec!["rw".into(), "nosuid".into(), "nodev".into()], + }]; + // Bind-mount client TLS materials into the container when mTLS + // is enabled. The supervisor reads these via OPENSHELL_TLS_CA, + // OPENSHELL_TLS_CERT, and OPENSHELL_TLS_KEY env vars (set in + // build_env above) to establish an mTLS connection back to the + // gateway. + if let (Some(ca), Some(cert), Some(key)) = + (&config.tls_ca, &config.tls_cert, &config.tls_key) + { + let ro = vec!["ro".into(), "rbind".into()]; + m.push(Mount { + kind: "bind".into(), + source: ca.display().to_string(), + destination: TLS_CA_MOUNT_PATH.into(), + options: ro.clone(), + }); + m.push(Mount { + kind: "bind".into(), + source: cert.display().to_string(), + destination: TLS_CERT_MOUNT_PATH.into(), + options: ro.clone(), + }); + m.push(Mount { + kind: "bind".into(), + source: key.display().to_string(), + destination: TLS_KEY_MOUNT_PATH.into(), + options: ro, + }); + } + m + }, // Publish the SSH port with host_port=0 to get an ephemeral host port. // In rootless Podman the bridge network (10.89.x.x) is not routable from // the host, so we must use the published host port on 127.0.0.1 instead. @@ -834,4 +879,75 @@ mod tests { "image volume should be read-only" ); } + + #[test] + fn container_spec_includes_tls_mounts_when_configured() { + let sandbox = test_sandbox("tls-id", "tls-name"); + let mut config = test_config(); + config.tls_ca = Some(std::path::PathBuf::from("/host/ca.crt")); + config.tls_cert = Some(std::path::PathBuf::from("/host/tls.crt")); + config.tls_key = Some(std::path::PathBuf::from("/host/tls.key")); + + let spec = build_container_spec(&sandbox, &config); + + // Verify TLS env vars are set. + let env_map = spec["env"].as_object().expect("env should be an object"); + assert_eq!( + env_map.get("OPENSHELL_TLS_CA").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/ca.crt"), + ); + assert_eq!( + env_map.get("OPENSHELL_TLS_CERT").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/tls.crt"), + ); + assert_eq!( + env_map.get("OPENSHELL_TLS_KEY").and_then(|v| v.as_str()), + Some("/etc/openshell/tls/client/tls.key"), + ); + + // Verify bind mounts exist for all three cert files. + let mounts = spec["mounts"] + .as_array() + .expect("mounts should be an array"); + let bind_dests: Vec<&str> = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .filter_map(|m| m["destination"].as_str()) + .collect(); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/ca.crt"), + "should bind-mount CA cert" + ); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/tls.crt"), + "should bind-mount client cert" + ); + assert!( + bind_dests.contains(&"/etc/openshell/tls/client/tls.key"), + "should bind-mount client key" + ); + } + + #[test] + fn container_spec_omits_tls_without_config() { + let sandbox = test_sandbox("notls-id", "notls-name"); + let config = test_config(); + + let spec = build_container_spec(&sandbox, &config); + + let env_map = spec["env"].as_object().expect("env should be an object"); + assert!( + env_map.get("OPENSHELL_TLS_CA").is_none(), + "TLS env vars should not be set without TLS config" + ); + + let mounts = spec["mounts"] + .as_array() + .expect("mounts should be an array"); + let bind_count = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .count(); + assert_eq!(bind_count, 0, "no bind mounts without TLS config"); + } } diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index ae9492d74..3dc583545 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -78,7 +78,7 @@ impl PodmanComputeDriver { // Verify connectivity. client.ping().await?; - // Verify cgroups v2 and log system info. + // Verify cgroups v2, detect rootless mode, and log system info. match client.system_info().await { Ok(info) => { if info.host.cgroup_version != "v2" { @@ -92,6 +92,7 @@ impl PodmanComputeDriver { info!( cgroup_version = %info.host.cgroup_version, network_backend = %info.host.network_backend, + rootless = info.host.security.rootless, "Connected to Podman" ); } @@ -124,16 +125,24 @@ impl PodmanComputeDriver { // Auto-detect the gRPC callback endpoint when not explicitly // configured. Sandbox containers use host.containers.internal // (injected via hostadd with host-gateway in the container spec) - // to reach the gateway server on the host. This works in both - // rootful and rootless Podman — the bridge gateway IP does NOT - // work in rootless mode because it lives inside the user - // namespace, not on the host. + // to reach the gateway server on the host. The scheme is + // determined by whether TLS client certs are configured: when + // all three TLS paths are set, the endpoint uses https so the + // supervisor connects with mTLS. if config.grpc_endpoint.is_empty() { - config.grpc_endpoint = - format!("http://host.containers.internal:{}", config.gateway_port); + let scheme = if config.tls_enabled() { + "https" + } else { + "http" + }; + config.grpc_endpoint = format!( + "{scheme}://host.containers.internal:{}", + config.gateway_port + ); info!( grpc_endpoint = %config.grpc_endpoint, - "Auto-detected gRPC endpoint via host.containers.internal" + tls = config.tls_enabled(), + "Auto-detected gRPC endpoint" ); } @@ -557,51 +566,67 @@ mod tests { assert!(matches!(err, ComputeDriverError::Message(_))); } - // ── gateway_port / grpc_endpoint auto-detection ─────────────────────── + // ── grpc_endpoint auto-detection ─────────────────────────────────── // // PodmanComputeDriver::new() fills grpc_endpoint when it is empty. - // These tests use for_tests() (which skips the Podman socket handshake) - // to verify the endpoint that ends up in the config — and therefore in - // OPENSHELL_ENDPOINT inside every sandbox container. + // The scheme (http vs https) depends on whether TLS client certs are + // configured. These tests simulate the auto-detection logic. #[test] - fn grpc_endpoint_auto_detected_from_gateway_port() { - let config = PodmanComputeConfig { + fn grpc_endpoint_http_without_tls() { + let mut cfg = PodmanComputeConfig { gateway_port: 8081, ..PodmanComputeConfig::default() }; - // Simulate what new() does once the socket/network checks pass. - let mut cfg = config; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "http://host.containers.internal:8081"); } #[test] - fn grpc_endpoint_auto_detected_uses_default_port_when_gateway_port_is_default() { - let config = PodmanComputeConfig::default(); - assert_eq!( - config.gateway_port, - openshell_core::config::DEFAULT_SERVER_PORT - ); - let mut cfg = config; + fn grpc_endpoint_https_with_tls() { + let mut cfg = PodmanComputeConfig { + gateway_port: 8080, + tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), + tls_cert: Some(std::path::PathBuf::from("/tls/tls.crt")), + tls_key: Some(std::path::PathBuf::from("/tls/tls.key")), + ..PodmanComputeConfig::default() + }; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); + } + assert_eq!(cfg.grpc_endpoint, "https://host.containers.internal:8080"); + } + + #[test] + fn grpc_endpoint_partial_tls_falls_back_to_http() { + let mut cfg = PodmanComputeConfig { + gateway_port: 8080, + tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), + // tls_cert and tls_key not set — incomplete TLS config. + ..PodmanComputeConfig::default() + }; + assert!(!cfg.tls_enabled()); + if cfg.grpc_endpoint.is_empty() { + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "http://host.containers.internal:8080"); } #[test] - fn explicit_grpc_endpoint_takes_precedence_over_gateway_port() { - let config = PodmanComputeConfig { + fn explicit_grpc_endpoint_takes_precedence() { + let mut cfg = PodmanComputeConfig { grpc_endpoint: "https://gateway.internal:9000".to_string(), gateway_port: 8081, ..PodmanComputeConfig::default() }; - let mut cfg = config; if cfg.grpc_endpoint.is_empty() { - cfg.grpc_endpoint = format!("http://host.containers.internal:{}", cfg.gateway_port); + let scheme = if cfg.tls_enabled() { "https" } else { "http" }; + cfg.grpc_endpoint = format!("{scheme}://host.containers.internal:{}", cfg.gateway_port); } assert_eq!(cfg.grpc_endpoint, "https://gateway.internal:9000"); } diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index 6020de5bd..c8845e524 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -86,6 +86,18 @@ struct Args { /// OCI image containing the openshell-sandbox supervisor binary. #[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE")] supervisor_image: String, + + /// Host path to the CA certificate for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_CA")] + podman_tls_ca: Option, + + /// Host path to the client certificate for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_CERT")] + podman_tls_cert: Option, + + /// Host path to the client private key for sandbox mTLS. + #[arg(long, env = "OPENSHELL_PODMAN_TLS_KEY")] + podman_tls_key: Option, } #[tokio::main] @@ -115,6 +127,9 @@ async fn main() -> Result<()> { ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, stop_timeout_secs: args.stop_timeout, supervisor_image: args.supervisor_image, + tls_ca: args.podman_tls_ca, + tls_cert: args.podman_tls_cert, + tls_key: args.podman_tls_key, }) .await .into_diagnostic()?; diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index f37ba472f..0191f2f60 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -505,6 +505,22 @@ async fn build_compute_runtime( .filter(|s| !s.is_empty()) .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()); + // TLS client cert paths for sandbox mTLS. When all three are + // set, the Podman driver bind-mounts them into sandbox + // containers and switches the endpoint to https://. + let podman_tls_ca = std::env::var("OPENSHELL_PODMAN_TLS_CA") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + let podman_tls_cert = std::env::var("OPENSHELL_PODMAN_TLS_CERT") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + let podman_tls_key = std::env::var("OPENSHELL_PODMAN_TLS_KEY") + .ok() + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from); + ComputeRuntime::new_podman( openshell_driver_podman::PodmanComputeConfig { socket_path, @@ -520,6 +536,9 @@ async fn build_compute_runtime( ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, stop_timeout_secs, supervisor_image, + tls_ca: podman_tls_ca, + tls_cert: podman_tls_cert, + tls_key: podman_tls_key, }, store, sandbox_index, diff --git a/deploy/rpm/GATEWAY-CONFIG.md b/deploy/rpm/GATEWAY-CONFIG.md new file mode 100644 index 000000000..75ba5ddb9 --- /dev/null +++ b/deploy/rpm/GATEWAY-CONFIG.md @@ -0,0 +1,201 @@ +# OpenShell Gateway Configuration (RPM) + +This document covers the configuration of the OpenShell gateway when +installed via the RPM package on Fedora and RHEL systems. + +## Quick start + +```shell +# Enable and start the gateway (rootless Podman, mTLS enabled): +systemctl --user enable --now openshell-gateway + +# Verify the gateway is running: +openshell sandbox list + +# Make the service persist across reboots without an active login: +sudo loginctl enable-linger $USER +``` + +On first start, the gateway auto-generates: + +- A self-signed PKI bundle (CA, server cert, client cert) for mTLS +- An SSH handshake secret for sandbox authentication + +No manual certificate setup is required. + +## TLS (mTLS) + +The RPM enables mutual TLS by default. The gateway requires a valid +client certificate for all API connections, protecting the API even +though it listens on all interfaces (`0.0.0.0`). + +### Auto-generated certificates + +On first start, the `init-pki.sh` script generates certificates using +OpenSSL: + +| File | Purpose | Location (user unit) | +|------|---------|---------------------| +| CA certificate | Root of trust | `~/.local/state/openshell/tls/ca.crt` | +| CA private key | Signs server and client certs | `~/.local/state/openshell/tls/ca.key` | +| Server certificate | Gateway TLS identity | `~/.local/state/openshell/tls/server/tls.crt` | +| Server private key | Gateway TLS key | `~/.local/state/openshell/tls/server/tls.key` | +| Client certificate | CLI and sandbox identity | `~/.local/state/openshell/tls/client/tls.crt` | +| Client private key | CLI and sandbox key | `~/.local/state/openshell/tls/client/tls.key` | + +Client certificates are also copied to the CLI auto-discovery directory: + +``` +~/.config/openshell/gateways/openshell/mtls/ + ca.crt + tls.crt + tls.key +``` + +The CLI automatically discovers these certificates when connecting to a +gateway on `localhost` or `127.0.0.1`. + +### Server certificate SANs + +The auto-generated server certificate includes these Subject Alternative +Names: + +- `localhost` +- `openshell` +- `openshell.openshell.svc` +- `openshell.openshell.svc.cluster.local` +- `host.containers.internal` +- `host.docker.internal` +- `127.0.0.1` + +### Using externally-managed certificates + +To use certificates from an external CA or cert-manager: + +1. Place the server cert, key, and CA cert on the filesystem +1. Edit `/etc/sysconfig/openshell-gateway` (system unit) or use + `systemctl --user edit openshell-gateway` (user unit) to override: + +```shell +OPENSHELL_TLS_CERT=/path/to/server/tls.crt +OPENSHELL_TLS_KEY=/path/to/server/tls.key +OPENSHELL_TLS_CLIENT_CA=/path/to/ca.crt +``` + +1. Place the client cert where the CLI expects it: + +``` +~/.config/openshell/gateways/openshell/mtls/ + ca.crt + tls.crt + tls.key +``` + +### Rotating certificates + +Delete the TLS state directory and restart the gateway: + +```shell +rm -rf ~/.local/state/openshell/tls +systemctl --user restart openshell-gateway +``` + +The gateway regenerates the PKI on next start. + +### Disabling TLS + +To disable TLS (not recommended for production): + +1. Edit the sysconfig file or use a systemd override: + +```shell +OPENSHELL_DISABLE_TLS=true +``` + +1. Remove or comment out the `OPENSHELL_TLS_*` and + `OPENSHELL_PODMAN_TLS_*` variables. + +1. Restart the gateway. + +With TLS disabled, the gateway has no authentication. Any host that can +reach the gateway port has full access to the API. + +## Sandbox TLS + +When mTLS is enabled, the Podman driver bind-mounts the client +certificates into each sandbox container so the supervisor process can +establish an mTLS connection back to the gateway. + +The following environment variables control the host-side paths of the +client certificates that are mounted into sandbox containers: + +| Variable | Description | +|----------|-------------| +| `OPENSHELL_PODMAN_TLS_CA` | CA certificate (host path) | +| `OPENSHELL_PODMAN_TLS_CERT` | Client certificate (host path) | +| `OPENSHELL_PODMAN_TLS_KEY` | Client private key (host path) | + +Inside the container, the supervisor reads them from: + +- `/etc/openshell/tls/client/ca.crt` +- `/etc/openshell/tls/client/tls.crt` +- `/etc/openshell/tls/client/tls.key` + +## Configuration reference + +All settings are controlled via environment variables. The system unit +reads from `/etc/sysconfig/openshell-gateway`. The user unit reads from +`~/.config/openshell/gateway.env` and systemd `Environment=` directives. + +### Gateway settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_BIND_HOST` | `0.0.0.0` | IP address to bind all listeners to | +| `OPENSHELL_SERVER_PORT` | `8080` | Port for the gRPC/HTTP API | +| `OPENSHELL_DRIVERS` | `podman` | Compute driver (`podman`, `docker`, `kubernetes`) | +| `OPENSHELL_DB_URL` | (varies) | SQLite database URL for state persistence | +| `OPENSHELL_SSH_HANDSHAKE_SECRET` | (auto-generated) | Shared secret for sandbox SSH authentication | + +### TLS settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_TLS_CERT` | (auto-generated path) | Server TLS certificate | +| `OPENSHELL_TLS_KEY` | (auto-generated path) | Server TLS private key | +| `OPENSHELL_TLS_CLIENT_CA` | (auto-generated path) | CA for client certificate verification | +| `OPENSHELL_DISABLE_TLS` | (unset) | Set to `true` to disable TLS | +| `OPENSHELL_PODMAN_TLS_CA` | (auto-generated path) | CA cert mounted into sandbox containers | +| `OPENSHELL_PODMAN_TLS_CERT` | (auto-generated path) | Client cert mounted into sandbox containers | +| `OPENSHELL_PODMAN_TLS_KEY` | (auto-generated path) | Client key mounted into sandbox containers | + +### Sandbox settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_SUPERVISOR_IMAGE` | `ghcr.io/.../supervisor:latest` | Supervisor binary OCI image | +| `OPENSHELL_SANDBOX_IMAGE` | `ghcr.io/.../sandboxes/base:latest` | Default sandbox base image | + +## File locations + +### User unit (systemctl --user) + +| Purpose | Path | +|---------|------| +| Gateway binary | `/usr/bin/openshell-gateway` | +| CLI binary | `/usr/bin/openshell` | +| Systemd unit | `/usr/lib/systemd/user/openshell-gateway.service` | +| PKI bootstrap script | `/usr/libexec/openshell/init-pki.sh` | +| TLS certificates | `~/.local/state/openshell/tls/` | +| CLI client certs | `~/.config/openshell/gateways/openshell/mtls/` | +| Gateway database | `~/.local/state/openshell/gateway.db` | +| SSH handshake secret | `~/.config/openshell/gateway.env` | + +### System unit (systemctl) + +| Purpose | Path | +|---------|------| +| Systemd unit | `/usr/lib/systemd/system/openshell-gateway.service` | +| Configuration | `/etc/sysconfig/openshell-gateway` | +| TLS certificates | `/var/lib/openshell/tls/` | +| Gateway database | `/var/lib/openshell/gateway.db` | diff --git a/deploy/rpm/init-pki.sh b/deploy/rpm/init-pki.sh new file mode 100755 index 000000000..e26102d1a --- /dev/null +++ b/deploy/rpm/init-pki.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Generate a self-signed PKI bundle for the OpenShell gateway. +# +# Called from the systemd ExecStartPre directive to bootstrap mTLS on +# first start. Idempotent: exits immediately if certs already exist. +# +# Usage: +# init-pki.sh +# +# Output layout: +# /ca.crt CA certificate +# /ca.key CA private key (mode 0600) +# /server/tls.crt Server certificate +# /server/tls.key Server private key (mode 0600) +# /client/tls.crt Client certificate +# /client/tls.key Client private key (mode 0600) +# +# Client certs are also copied to the CLI's auto-discovery directory: +# $XDG_CONFIG_HOME/openshell/gateways/openshell/mtls/{ca.crt,tls.crt,tls.key} + +set -euo pipefail + +PKI_DIR="${1:?Usage: init-pki.sh }" + +# ── Idempotent: skip if CA already exists ──────────────────────────── +if [ -f "${PKI_DIR}/ca.crt" ]; then + exit 0 +fi + +# ── Resolve CLI cert directory ─────────────────────────────────────── +CLI_MTLS_DIR="${XDG_CONFIG_HOME:-${HOME}/.config}/openshell/gateways/openshell/mtls" + +# ── Create directories ─────────────────────────────────────────────── +mkdir -p "${PKI_DIR}/server" "${PKI_DIR}/client" "${CLI_MTLS_DIR}" + +# ── Temporary workspace (cleaned up on exit) ───────────────────────── +TMPDIR=$(mktemp -d) +trap 'rm -rf "${TMPDIR}"' EXIT + +# ── Server certificate SANs ───────────────────────────────────────── +# These must match what the supervisor connects to. The CLI also +# connects using localhost/127.0.0.1 by default. +cat > "${TMPDIR}/server-san.cnf" <<'EOF' +[req] +distinguished_name = req_dn +req_extensions = v3_req +prompt = no + +[req_dn] +O = openshell +CN = openshell-server + +[v3_req] +subjectAltName = @alt_names + +[alt_names] +DNS.1 = localhost +DNS.2 = openshell +DNS.3 = openshell.openshell.svc +DNS.4 = openshell.openshell.svc.cluster.local +DNS.5 = host.containers.internal +DNS.6 = host.docker.internal +IP.1 = 127.0.0.1 +EOF + +# ── Generate CA ────────────────────────────────────────────────────── +openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/ca.key" \ + -out "${PKI_DIR}/ca.crt" \ + -days 3650 -nodes \ + -subj "/O=openshell/CN=openshell-ca" \ + 2>/dev/null +chmod 600 "${PKI_DIR}/ca.key" + +# ── Generate server certificate ────────────────────────────────────── +openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/server/tls.key" \ + -out "${TMPDIR}/server.csr" \ + -nodes \ + -config "${TMPDIR}/server-san.cnf" \ + 2>/dev/null + +openssl x509 -req \ + -in "${TMPDIR}/server.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/server/tls.crt" \ + -days 3650 \ + -extensions v3_req \ + -extfile "${TMPDIR}/server-san.cnf" \ + 2>/dev/null +chmod 600 "${PKI_DIR}/server/tls.key" + +# ── Generate client certificate ────────────────────────────────────── +openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/client/tls.key" \ + -out "${TMPDIR}/client.csr" \ + -nodes \ + -subj "/O=openshell/CN=openshell-client" \ + 2>/dev/null + +openssl x509 -req \ + -in "${TMPDIR}/client.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/client/tls.crt" \ + -days 3650 \ + 2>/dev/null +chmod 600 "${PKI_DIR}/client/tls.key" + +# ── Copy client certs to CLI auto-discovery directory ──────────────── +# The CLI automatically looks for certs at: +# $XDG_CONFIG_HOME/openshell/gateways//mtls/{ca.crt,tls.crt,tls.key} +# For localhost gateways, defaults to "openshell". +cp "${PKI_DIR}/ca.crt" "${CLI_MTLS_DIR}/ca.crt" +cp "${PKI_DIR}/client/tls.crt" "${CLI_MTLS_DIR}/tls.crt" +cp "${PKI_DIR}/client/tls.key" "${CLI_MTLS_DIR}/tls.key" +chmod 600 "${CLI_MTLS_DIR}/tls.key" + +echo "PKI bootstrap complete: ${PKI_DIR}" diff --git a/openshell.spec b/openshell.spec index 88c2b1b40..124a5e0a8 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260428141722522502.rpm.27.g09c857c1%{?dist} +Release: 1.20260430135232393374.rpm.45.g17657c0b%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -52,6 +52,7 @@ LLM inference routing. %package gateway Summary: OpenShell gateway server with Podman sandbox driver Requires: podman +Requires: openssl Requires: %{name} = %{version}-%{release} %description gateway @@ -151,27 +152,37 @@ Requires=podman.service [Service] Type=exec -# Self-contained defaults for rootless operation. +# Self-contained defaults for rootless operation with mTLS. # -# WARNING: TLS is disabled. The gateway has NO authentication. -# It binds to localhost by default; if you change OPENSHELL_BIND_HOST -# to a non-loopback address, configure mTLS certificates and remove -# OPENSHELL_DISABLE_TLS. +# PKI is auto-generated on first start. Client certs are placed in +# ~/.config/openshell/gateways/openshell/mtls/ so the CLI discovers +# them automatically. See /usr/share/doc/openshell-gateway/ for details. # # The SSH handshake secret is auto-generated on first start into # ~/.config/openshell/gateway.env (mode 0600). To override, edit # that file or use: systemctl --user edit openshell-gateway.service +# Auto-generate PKI on first start if not present. +# %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. +ExecStartPre=%{_libexecdir}/openshell/init-pki.sh %%S/openshell/tls + # Auto-generate SSH handshake secret on first start if not present. # %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. ExecStartPre=/bin/sh -c 'ENV=%%E/openshell/gateway.env; [ -f "$ENV" ] || { mkdir -p %%E/openshell && echo "OPENSHELL_SSH_HANDSHAKE_SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f)" > "$ENV" && chmod 600 "$ENV"; }' EnvironmentFile=-%%E/openshell/gateway.env -Environment=OPENSHELL_BIND_HOST=127.0.0.1 +Environment=OPENSHELL_BIND_HOST=0.0.0.0 Environment=OPENSHELL_DRIVERS=podman Environment=OPENSHELL_DB_URL=sqlite://%%S/openshell/gateway.db Environment=OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest Environment=OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest -Environment=OPENSHELL_DISABLE_TLS=true +# mTLS: auto-generated certs in the state directory. +Environment=OPENSHELL_TLS_CERT=%%S/openshell/tls/server/tls.crt +Environment=OPENSHELL_TLS_KEY=%%S/openshell/tls/server/tls.key +Environment=OPENSHELL_TLS_CLIENT_CA=%%S/openshell/tls/ca.crt +# Podman driver: client certs bind-mounted into sandbox containers. +Environment=OPENSHELL_PODMAN_TLS_CA=%%S/openshell/tls/ca.crt +Environment=OPENSHELL_PODMAN_TLS_CERT=%%S/openshell/tls/client/tls.crt +Environment=OPENSHELL_PODMAN_TLS_KEY=%%S/openshell/tls/client/tls.key ExecStart=/usr/bin/openshell-gateway StateDirectory=openshell Restart=on-failure @@ -222,30 +233,44 @@ OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest # Default sandbox base image. OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest -# Bind the gateway to localhost only (single-host use case). -# Change to 0.0.0.0 if the gateway must accept connections from -# other hosts on the network. -OPENSHELL_BIND_HOST=127.0.0.1 - -# ---- SECURITY WARNING ---- -# TLS is disabled by default for ease of initial setup. With TLS -# disabled, the gateway has NO authentication. The default bind -# address is localhost (127.0.0.1), limiting access to the local -# machine. If you change OPENSHELL_BIND_HOST to a non-loopback -# address, any host that can reach the gateway port has full -# unauthenticated access to the API, including sandbox creation, -# command execution, and credential retrieval. +# Bind address. Default 0.0.0.0 listens on all interfaces; mTLS +# prevents unauthenticated access. +OPENSHELL_BIND_HOST=0.0.0.0 + +# ---- TLS (mTLS enabled by default) ---- +# A self-signed PKI is auto-generated on first start by init-pki.sh. +# Client certs are placed in ~/.config/openshell/gateways/openshell/mtls/ +# so the CLI discovers them automatically. # -# For any network-exposed deployment: -# 1. Generate mTLS certificates (see OpenShell docs) -# 2. Set OPENSHELL_TLS_CERT, OPENSHELL_TLS_KEY, OPENSHELL_TLS_CLIENT_CA -# 3. Comment out OPENSHELL_DISABLE_TLS below -OPENSHELL_DISABLE_TLS=true +# To use externally-managed certs, replace the paths below. +# To rotate certs, delete the TLS state directory and restart. +# To disable TLS (NOT RECOMMENDED), uncomment the line below and +# remove or comment out the OPENSHELL_TLS_* and OPENSHELL_PODMAN_TLS_* +# variables. +# OPENSHELL_DISABLE_TLS=true + +# Server TLS (gateway listens with these certs). +OPENSHELL_TLS_CERT=/var/lib/openshell/tls/server/tls.crt +OPENSHELL_TLS_KEY=/var/lib/openshell/tls/server/tls.key +OPENSHELL_TLS_CLIENT_CA=/var/lib/openshell/tls/ca.crt + +# Podman driver: client certs bind-mounted into sandbox containers. +OPENSHELL_PODMAN_TLS_CA=/var/lib/openshell/tls/ca.crt +OPENSHELL_PODMAN_TLS_CERT=/var/lib/openshell/tls/client/tls.crt +OPENSHELL_PODMAN_TLS_KEY=/var/lib/openshell/tls/client/tls.key EOF # --- Gateway state directory --- install -d %{buildroot}%{_sharedstatedir}/%{name} +# --- PKI bootstrap script --- +install -d %{buildroot}%{_libexecdir}/%{name} +install -pm 0755 deploy/rpm/init-pki.sh %{buildroot}%{_libexecdir}/%{name}/init-pki.sh + +# --- Gateway documentation --- +install -d %{buildroot}%{_docdir}/%{name}-gateway +install -pm 0644 deploy/rpm/GATEWAY-CONFIG.md %{buildroot}%{_docdir}/%{name}-gateway/GATEWAY-CONFIG.md + # --- Python SDK --- # Install Python SDK modules (test files are intentionally excluded) install -d %{buildroot}%{python3_sitelib}/%{name} @@ -315,9 +340,11 @@ fi %files gateway %license LICENSE +%doc %{_docdir}/%{name}-gateway/GATEWAY-CONFIG.md %{_bindir}/%{name}-gateway %{_unitdir}/%{name}-gateway.service %{_userunitdir}/%{name}-gateway.service +%{_libexecdir}/%{name}/init-pki.sh %attr(0640,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/%{name}-gateway %dir %{_sharedstatedir}/%{name} From 02bd2588341a7251e8fb8148b54f95ec233d5e1c Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 30 Apr 2026 14:37:58 -0500 Subject: [PATCH 10/18] refactor(podman): rename tls_ca/cert/key to guest_tls_ca/cert/key Align the Podman driver's TLS config field names with the Docker and VM drivers, which use guest_tls_ca/guest_tls_cert/guest_tls_key for client certificates injected into sandbox containers. --- crates/openshell-driver-podman/src/config.rs | 27 ++++++++++--------- .../openshell-driver-podman/src/container.rs | 14 +++++----- crates/openshell-driver-podman/src/driver.rs | 10 +++---- crates/openshell-driver-podman/src/main.rs | 6 ++--- crates/openshell-server/src/lib.rs | 6 ++--- openshell.spec | 2 +- 6 files changed, 34 insertions(+), 31 deletions(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 0c31e09b7..9798cc79f 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -104,21 +104,22 @@ pub struct PodmanComputeConfig { pub supervisor_image: String, /// Host path to the CA certificate for sandbox mTLS. /// - /// When all three TLS paths (`tls_ca`, `tls_cert`, `tls_key`) are set, - /// the driver bind-mounts them into sandbox containers and switches the - /// auto-detected endpoint from `http://` to `https://`. - pub tls_ca: Option, + /// When all three TLS paths (`guest_tls_ca`, `guest_tls_cert`, + /// `guest_tls_key`) are set, the driver bind-mounts them into sandbox + /// containers and switches the auto-detected endpoint from `http://` + /// to `https://`. + pub guest_tls_ca: Option, /// Host path to the client certificate for sandbox mTLS. - pub tls_cert: Option, + pub guest_tls_cert: Option, /// Host path to the client private key for sandbox mTLS. - pub tls_key: Option, + pub guest_tls_key: Option, } impl PodmanComputeConfig { /// Returns `true` when all three TLS paths are configured. #[must_use] pub fn tls_enabled(&self) -> bool { - self.tls_ca.is_some() && self.tls_cert.is_some() && self.tls_key.is_some() + self.guest_tls_ca.is_some() && self.guest_tls_cert.is_some() && self.guest_tls_key.is_some() } /// Resolve the default socket path from the environment. @@ -164,9 +165,9 @@ impl Default for PodmanComputeConfig { ssh_handshake_skew_secs: DEFAULT_SSH_HANDSHAKE_SKEW_SECS, stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), - tls_ca: None, - tls_cert: None, - tls_key: None, + guest_tls_ca: None, + guest_tls_cert: None, + guest_tls_key: None, } } } @@ -187,9 +188,9 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("ssh_handshake_skew_secs", &self.ssh_handshake_skew_secs) .field("stop_timeout_secs", &self.stop_timeout_secs) .field("supervisor_image", &self.supervisor_image) - .field("tls_ca", &self.tls_ca) - .field("tls_cert", &self.tls_cert) - .field("tls_key", &self.tls_key) + .field("guest_tls_ca", &self.guest_tls_ca) + .field("guest_tls_cert", &self.guest_tls_cert) + .field("guest_tls_key", &self.guest_tls_key) .finish() } } diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index fd9f30a53..ca1fd329c 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -488,9 +488,11 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi // OPENSHELL_TLS_CERT, and OPENSHELL_TLS_KEY env vars (set in // build_env above) to establish an mTLS connection back to the // gateway. - if let (Some(ca), Some(cert), Some(key)) = - (&config.tls_ca, &config.tls_cert, &config.tls_key) - { + if let (Some(ca), Some(cert), Some(key)) = ( + &config.guest_tls_ca, + &config.guest_tls_cert, + &config.guest_tls_key, + ) { let ro = vec!["ro".into(), "rbind".into()]; m.push(Mount { kind: "bind".into(), @@ -884,9 +886,9 @@ mod tests { fn container_spec_includes_tls_mounts_when_configured() { let sandbox = test_sandbox("tls-id", "tls-name"); let mut config = test_config(); - config.tls_ca = Some(std::path::PathBuf::from("/host/ca.crt")); - config.tls_cert = Some(std::path::PathBuf::from("/host/tls.crt")); - config.tls_key = Some(std::path::PathBuf::from("/host/tls.key")); + config.guest_tls_ca = Some(std::path::PathBuf::from("/host/ca.crt")); + config.guest_tls_cert = Some(std::path::PathBuf::from("/host/tls.crt")); + config.guest_tls_key = Some(std::path::PathBuf::from("/host/tls.key")); let spec = build_container_spec(&sandbox, &config); diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index 3dc583545..4e3f6b2d4 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -589,9 +589,9 @@ mod tests { fn grpc_endpoint_https_with_tls() { let mut cfg = PodmanComputeConfig { gateway_port: 8080, - tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), - tls_cert: Some(std::path::PathBuf::from("/tls/tls.crt")), - tls_key: Some(std::path::PathBuf::from("/tls/tls.key")), + guest_tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), + guest_tls_cert: Some(std::path::PathBuf::from("/tls/tls.crt")), + guest_tls_key: Some(std::path::PathBuf::from("/tls/tls.key")), ..PodmanComputeConfig::default() }; if cfg.grpc_endpoint.is_empty() { @@ -605,8 +605,8 @@ mod tests { fn grpc_endpoint_partial_tls_falls_back_to_http() { let mut cfg = PodmanComputeConfig { gateway_port: 8080, - tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), - // tls_cert and tls_key not set — incomplete TLS config. + guest_tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), + // guest_tls_cert and guest_tls_key not set — incomplete TLS config. ..PodmanComputeConfig::default() }; assert!(!cfg.tls_enabled()); diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index c8845e524..25cb5912f 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -127,9 +127,9 @@ async fn main() -> Result<()> { ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, stop_timeout_secs: args.stop_timeout, supervisor_image: args.supervisor_image, - tls_ca: args.podman_tls_ca, - tls_cert: args.podman_tls_cert, - tls_key: args.podman_tls_key, + guest_tls_ca: args.podman_tls_ca, + guest_tls_cert: args.podman_tls_cert, + guest_tls_key: args.podman_tls_key, }) .await .into_diagnostic()?; diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 0191f2f60..cad79eb56 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -536,9 +536,9 @@ async fn build_compute_runtime( ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, stop_timeout_secs, supervisor_image, - tls_ca: podman_tls_ca, - tls_cert: podman_tls_cert, - tls_key: podman_tls_key, + guest_tls_ca: podman_tls_ca, + guest_tls_cert: podman_tls_cert, + guest_tls_key: podman_tls_key, }, store, sandbox_index, diff --git a/openshell.spec b/openshell.spec index 124a5e0a8..dff4adb43 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260430135232393374.rpm.45.g17657c0b%{?dist} +Release: 1.20260430141223488997.rpm.46.g7ff80c76%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 From 5fb3a9b5d9c548c3cd1b76a34f0ad6bfd1395bbd Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 30 Apr 2026 15:11:13 -0500 Subject: [PATCH 11/18] fix(cli): skip cert extraction when TLS certs already exist on disk gateway add --local tries to extract TLS certificates from a Docker container, which fails on RPM/systemd deployments where the gateway runs as a native service (not in a container). When init-pki.sh has already provisioned client certs to the CLI auto-discovery directory, skip the Docker-based extraction and use the existing certs. --- crates/openshell-cli/src/run.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 37488ba5f..90655122a 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -1168,10 +1168,27 @@ pub async fn gateway_add( // is not registered. Pass the endpoint port so the container can be // identified by its host port binding when multiple gateways run on // the same Docker host. - let endpoint_port = url::Url::parse(&endpoint).ok().and_then(|u| u.port()); - eprintln!("• Extracting TLS certificates from gateway container..."); - openshell_bootstrap::extract_and_store_pki(name, remote_opts.as_ref(), endpoint_port) - .await?; + // + // Skip extraction when client certs are already on disk (e.g., + // RPM/systemd deployments where init-pki.sh pre-provisions them + // before the gateway starts). + let certs_on_disk = openshell_core::paths::xdg_config_dir() + .map(|d| { + let mtls = d.join("openshell").join("gateways").join(name).join("mtls"); + mtls.join("ca.crt").is_file() + && mtls.join("tls.crt").is_file() + && mtls.join("tls.key").is_file() + }) + .unwrap_or(false); + + if certs_on_disk { + eprintln!("• TLS certificates already present, skipping extraction"); + } else { + let endpoint_port = url::Url::parse(&endpoint).ok().and_then(|u| u.port()); + eprintln!("• Extracting TLS certificates from gateway container..."); + openshell_bootstrap::extract_and_store_pki(name, remote_opts.as_ref(), endpoint_port) + .await?; + } let (remote_host, resolved_host) = remote.map_or((None, None), |dest| { let ssh_host = extract_host_from_ssh_destination(dest); From af13b9ab18c77c2fcc37d15b73ebfcbdfe21f0e6 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 30 Apr 2026 15:42:19 -0500 Subject: [PATCH 12/18] fix(cli): warn on http:// registration when mTLS certs exist and validate connectivity gateway add previously stored metadata without verifying the gateway was reachable, and silently accepted http:// endpoints even when mTLS client certs were already on disk (e.g., from RPM init-pki.sh). Add two validations to gateway_add: 1. When registering an http:// endpoint and mTLS certs exist for the gateway name, warn the user and suggest the https:// equivalent (with --local for loopback endpoints). 2. After storing metadata, perform a non-fatal health check against the gateway. If unreachable, print a warning. This catches scheme mismatches, wrong ports, and unreachable hosts at registration time rather than at first sandbox create. --- crates/openshell-cli/src/run.rs | 62 ++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 90655122a..68357a723 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -1135,6 +1135,31 @@ pub async fn gateway_add( } if endpoint.starts_with("http://") { + // Warn if mTLS certs exist for this gateway — the user likely + // meant to use https:// instead of http://. + let has_mtls_certs = openshell_core::paths::xdg_config_dir() + .map(|d| { + let mtls = d.join("openshell").join("gateways").join(name).join("mtls"); + mtls.join("ca.crt").is_file() + && mtls.join("tls.crt").is_file() + && mtls.join("tls.key").is_file() + }) + .unwrap_or(false); + + if has_mtls_certs { + let https_endpoint = endpoint.replacen("http://", "https://", 1); + let suggestion = if is_loopback_gateway_endpoint(&endpoint) { + format!("openshell gateway add --local {https_endpoint}") + } else { + format!("openshell gateway add {https_endpoint}") + }; + eprintln!( + "{} mTLS certificates found for gateway '{name}'. Did you mean to use https?", + "⚠".yellow().bold(), + ); + eprintln!(" Try: {suggestion}"); + } + let metadata = plaintext_gateway_metadata(name, &endpoint, remote, local); let gateway_type = gateway_type_label(&metadata); let gateway_auth = gateway_auth_label(&metadata); @@ -1142,6 +1167,21 @@ pub async fn gateway_add( store_gateway_metadata(name, &metadata)?; save_active_gateway(name)?; + // Verify the gateway is reachable. + let tls = TlsOptions::default(); + match http_health_check(&endpoint, &tls).await { + Ok(Some(status)) if status.is_success() => {} + _ => { + eprintln!( + "{} Gateway is not reachable at {endpoint}", + "⚠".yellow().bold(), + ); + if !has_mtls_certs { + eprintln!(" Verify the gateway is running and the endpoint is correct."); + } + } + } + eprintln!( "{} Gateway '{}' added and set as active", "✓".green().bold(), @@ -1211,6 +1251,18 @@ pub async fn gateway_add( store_gateway_metadata(name, &metadata)?; save_active_gateway(name)?; + // Verify the gateway is reachable over mTLS. + let tls = TlsOptions::default().with_gateway_name(name); + match http_health_check(&endpoint, &tls).await { + Ok(Some(status)) if status.is_success() => {} + _ => { + eprintln!( + "{} Gateway is not reachable at {endpoint}. Verify the gateway is running.", + "⚠".yellow().bold(), + ); + } + } + eprintln!( "{} Gateway '{}' added and set as active", "✓".green().bold(), @@ -1222,7 +1274,15 @@ pub async fn gateway_add( "Type:".dimmed(), if local { "local" } else { "remote" }, ); - eprintln!("{} TLS certificates extracted", "✓".green().bold()); + eprintln!( + "{} TLS certificates {}", + "✓".green().bold(), + if certs_on_disk { + "already present" + } else { + "extracted" + } + ); } else { // Cloud (edge-authenticated) gateway. let metadata = GatewayMetadata { From d46c5b74b2a00e6a355d64fd15715b7f0cb26e3f Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 30 Apr 2026 16:43:37 -0500 Subject: [PATCH 13/18] fix(cli): resolve loopback gateway name for mTLS cert detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cert-on-disk checks in gateway_add used the raw URL-derived hostname (e.g., 'localhost') to look up client certs, but init-pki.sh writes them under the 'openshell' gateway name. The TLS resolver in tls.rs maps localhost/127.0.0.1 to 'openshell' — the cert detection was not applying the same mapping. Extract mtls_certs_exist_for_endpoint() which applies the loopback -> 'openshell' name mapping, replacing two identical inline cert checks. This fixes the mTLS warning not firing for http://localhost:8080 and the cert extraction skip not working for https://localhost:8080. --- crates/openshell-cli/src/run.rs | 44 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 68357a723..8b795ff65 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -865,6 +865,32 @@ fn is_loopback_gateway_endpoint(endpoint: &str) -> bool { } } +/// Check whether mTLS client certs exist on disk for the gateway that +/// would serve this endpoint. +/// +/// Loopback endpoints (`localhost`, `127.0.0.1`, `::1`) resolve to the +/// `"openshell"` gateway name, matching the convention used by +/// `init-pki.sh` and the TLS cert resolver in `tls.rs`. +fn mtls_certs_exist_for_endpoint(name: &str, endpoint: &str) -> bool { + let cert_name = if is_loopback_gateway_endpoint(endpoint) { + "openshell" + } else { + name + }; + openshell_core::paths::xdg_config_dir() + .map(|d| { + let mtls = d + .join("openshell") + .join("gateways") + .join(cert_name) + .join("mtls"); + mtls.join("ca.crt").is_file() + && mtls.join("tls.crt").is_file() + && mtls.join("tls.key").is_file() + }) + .unwrap_or(false) +} + fn plaintext_gateway_is_remote(endpoint: &str, remote: Option<&str>, local: bool) -> bool { if local { return false; @@ -1137,14 +1163,7 @@ pub async fn gateway_add( if endpoint.starts_with("http://") { // Warn if mTLS certs exist for this gateway — the user likely // meant to use https:// instead of http://. - let has_mtls_certs = openshell_core::paths::xdg_config_dir() - .map(|d| { - let mtls = d.join("openshell").join("gateways").join(name).join("mtls"); - mtls.join("ca.crt").is_file() - && mtls.join("tls.crt").is_file() - && mtls.join("tls.key").is_file() - }) - .unwrap_or(false); + let has_mtls_certs = mtls_certs_exist_for_endpoint(name, &endpoint); if has_mtls_certs { let https_endpoint = endpoint.replacen("http://", "https://", 1); @@ -1212,14 +1231,7 @@ pub async fn gateway_add( // Skip extraction when client certs are already on disk (e.g., // RPM/systemd deployments where init-pki.sh pre-provisions them // before the gateway starts). - let certs_on_disk = openshell_core::paths::xdg_config_dir() - .map(|d| { - let mtls = d.join("openshell").join("gateways").join(name).join("mtls"); - mtls.join("ca.crt").is_file() - && mtls.join("tls.crt").is_file() - && mtls.join("tls.key").is_file() - }) - .unwrap_or(false); + let certs_on_disk = mtls_certs_exist_for_endpoint(name, &endpoint); if certs_on_disk { eprintln!("• TLS certificates already present, skipping extraction"); From c3ae4b90324c99c0b70eff1308b6de039d60210b Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 1 May 2026 09:25:53 -0500 Subject: [PATCH 14/18] fix(podman): resolve mTLS cert path mismatch and SELinux bind-mount denial Two fixes for RPM/systemd gateway deployments on Fedora/RHEL: 1. Gateway name derivation: loopback endpoints (localhost, 127.0.0.1, ::1) now derive the canonical 'openshell' gateway name when no --name is provided. This matches the convention used by init-pki.sh, default_tls_dir, mtls_certs_exist_for_endpoint, and bootstrap. Previously the raw hostname was used (e.g. 'localhost'), causing tls_dir_for_gateway to look in gateways/localhost/mtls/ while init-pki.sh placed certs in gateways/openshell/mtls/. 2. SELinux bind-mount relabeling: TLS cert bind-mounts now include the 'z' (shared relabel) option when SELinux is enabled. Without this, SELinux MAC policy denies the container process access to the bind-mounted cert files, causing 'failed to read CA cert' errors on Fedora/RHEL where SELinux is enforcing by default. Detection checks /sys/fs/selinux presence to cover both enforcing and permissive modes, matching Podman's own behavior. --- crates/openshell-cli/src/run.rs | 11 ++++- .../openshell-driver-podman/src/container.rs | 43 ++++++++++++++++++- crates/openshell-driver-podman/src/driver.rs | 8 ++-- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 8b795ff65..30cc36e86 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -1051,9 +1051,14 @@ pub async fn gateway_add( } // Derive a gateway name from the hostname when none is provided. + // Loopback endpoints use the canonical "openshell" name, matching the + // convention in init-pki.sh, default_tls_dir, and bootstrap. let derived_name; let name = if let Some(n) = name { n + } else if is_loopback_gateway_endpoint(&endpoint) { + derived_name = "openshell".to_string(); + &derived_name } else { // Parse out just the host portion of the URL. derived_name = url::Url::parse(&endpoint) @@ -6425,12 +6430,14 @@ mod tests { .expect("register plaintext gateway"); }); - let metadata = load_gateway_metadata("127.0.0.1").expect("load stored gateway"); + // Loopback endpoints derive the canonical "openshell" gateway + // name, matching init-pki.sh and default_tls_dir conventions. + let metadata = load_gateway_metadata("openshell").expect("load stored gateway"); assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext")); assert!(!metadata.is_remote); assert_eq!(metadata.client_lifecycle_managed, Some(false)); assert_eq!(metadata.gateway_endpoint, "http://127.0.0.1:8080"); - assert_eq!(load_active_gateway().as_deref(), Some("127.0.0.1")); + assert_eq!(load_active_gateway().as_deref(), Some("openshell")); }); } diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index ca1fd329c..2600a53e8 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -10,6 +10,25 @@ use serde::Serialize; use serde_json::Value; use std::collections::BTreeMap; +/// Returns `true` when `SELinux` is enabled (enforcing or permissive). +/// +/// Checks whether selinuxfs is mounted, matching Podman's own detection +/// logic. Bind-mount relabeling (the `z` mount option) is needed in both +/// enforcing and permissive modes: enforcing blocks access outright, while +/// permissive floods the audit log with AVC denials that mask real issues. +/// +/// On non-`SELinux` systems (Ubuntu, macOS, Alpine) the directory does not +/// exist and this returns `false`, leaving mount options unchanged. +#[cfg(target_os = "linux")] +fn is_selinux_enabled() -> bool { + std::path::Path::new("/sys/fs/selinux").is_dir() +} + +#[cfg(not(target_os = "linux"))] +fn is_selinux_enabled() -> bool { + false +} + /// Label key for the sandbox ID. pub const LABEL_SANDBOX_ID: &str = "openshell.sandbox-id"; /// Label key for the sandbox name. @@ -493,7 +512,13 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi &config.guest_tls_cert, &config.guest_tls_key, ) { - let ro = vec!["ro".into(), "rbind".into()]; + let mut ro = vec!["ro".into(), "rbind".into()]; + // On SELinux-enabled systems (Fedora, RHEL), bind-mounted + // files need the shared relabel option so the container + // process can read them through the SELinux MAC policy. + if is_selinux_enabled() { + ro.push("z".into()); + } m.push(Mount { kind: "bind".into(), source: ca.display().to_string(), @@ -928,6 +953,22 @@ mod tests { bind_dests.contains(&"/etc/openshell/tls/client/tls.key"), "should bind-mount client key" ); + + // Verify SELinux relabel option is present iff SELinux is enabled. + let tls_binds: Vec<&Value> = mounts + .iter() + .filter(|m| m["type"].as_str() == Some("bind")) + .collect(); + let has_z = tls_binds.iter().all(|m| { + m["options"] + .as_array() + .is_some_and(|opts| opts.iter().any(|o| o.as_str() == Some("z"))) + }); + assert_eq!( + has_z, + is_selinux_enabled(), + "TLS bind mounts should include 'z' option iff SELinux is enabled" + ); } #[test] diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index 4e3f6b2d4..523c046b4 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -589,9 +589,9 @@ mod tests { fn grpc_endpoint_https_with_tls() { let mut cfg = PodmanComputeConfig { gateway_port: 8080, - guest_tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), - guest_tls_cert: Some(std::path::PathBuf::from("/tls/tls.crt")), - guest_tls_key: Some(std::path::PathBuf::from("/tls/tls.key")), + guest_tls_ca: Some(PathBuf::from("/tls/ca.crt")), + guest_tls_cert: Some(PathBuf::from("/tls/tls.crt")), + guest_tls_key: Some(PathBuf::from("/tls/tls.key")), ..PodmanComputeConfig::default() }; if cfg.grpc_endpoint.is_empty() { @@ -605,7 +605,7 @@ mod tests { fn grpc_endpoint_partial_tls_falls_back_to_http() { let mut cfg = PodmanComputeConfig { gateway_port: 8080, - guest_tls_ca: Some(std::path::PathBuf::from("/tls/ca.crt")), + guest_tls_ca: Some(PathBuf::from("/tls/ca.crt")), // guest_tls_cert and guest_tls_key not set — incomplete TLS config. ..PodmanComputeConfig::default() }; From fec4ccb750fe006933daef2b2ffb934ee95b271a Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 1 May 2026 13:51:03 -0500 Subject: [PATCH 15/18] feat(rpm): overhaul packaging and documentation for Podman/systemd deployment Remove the system unit and /etc/sysconfig file -- the RPM now ships only the systemd user unit for rootless Podman operation. Replace the single GATEWAY-CONFIG.md with three focused guides (QUICKSTART, CONFIGURATION, TROUBLESHOOTING) covering prerequisites, gateway registration, provider setup, CLI compatibility, remote access, air-gap deployment, and upgrade procedures. Add init-gateway-env.sh to generate a well-commented gateway.env with an auto-generated SSH handshake secret on first start, replacing the inline ExecStartPre one-liner. Fix the systemd unit dependency from podman.service to podman.socket for proper socket activation. Add man pages for openshell(1), openshell-gateway(8), and openshell-gateway.env(5) built from pandoc markdown sources in deploy/man/, shared across packaging formats. --- .markdownlint-cli2.jsonc | 6 +- architecture/podman-driver.md | 2 +- architecture/podman-rootless-networking.md | 2 +- deploy/man/openshell-gateway.8.md | 207 ++++++++++++++++ deploy/man/openshell-gateway.env.5.md | 161 ++++++++++++ deploy/man/openshell.1.md | 217 +++++++++++++++++ .../{GATEWAY-CONFIG.md => CONFIGURATION.md} | 170 ++++++++----- deploy/rpm/QUICKSTART.md | 158 ++++++++++++ deploy/rpm/TROUBLESHOOTING.md | 230 ++++++++++++++++++ deploy/rpm/init-gateway-env.sh | 113 +++++++++ openshell.spec | 157 +++--------- 11 files changed, 1237 insertions(+), 186 deletions(-) create mode 100644 deploy/man/openshell-gateway.8.md create mode 100644 deploy/man/openshell-gateway.env.5.md create mode 100644 deploy/man/openshell.1.md rename deploy/rpm/{GATEWAY-CONFIG.md => CONFIGURATION.md} (50%) create mode 100644 deploy/rpm/QUICKSTART.md create mode 100644 deploy/rpm/TROUBLESHOOTING.md create mode 100644 deploy/rpm/init-gateway-env.sh diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc index 3f340fbc2..4c7f68e5a 100644 --- a/.markdownlint-cli2.jsonc +++ b/.markdownlint-cli2.jsonc @@ -10,7 +10,11 @@ ".opencode/**", ".github/**", "THIRD-PARTY-NOTICES/**", - "CLAUDE.md" + "CLAUDE.md", + // Man page sources use pandoc markdown with multiple H1 sections + // (NAME, SYNOPSIS, DESCRIPTION, etc.) which is standard for man + // pages but violates MD025. + "deploy/man/**" ], "config": { "default": true, diff --git a/architecture/podman-driver.md b/architecture/podman-driver.md index c6594f76a..67b86861d 100644 --- a/architecture/podman-driver.md +++ b/architecture/podman-driver.md @@ -112,7 +112,7 @@ When the Podman driver's TLS configuration is set (`tls_ca`, `tls_cert`, `tls_ke The supervisor reads these env vars and uses them to establish an mTLS connection back to the gateway. -The RPM packaging auto-generates a self-signed PKI on first start via `init-pki.sh`. Client certs are placed in the CLI auto-discovery directory (`~/.config/openshell/gateways/openshell/mtls/`) so the CLI connects with mTLS without manual configuration. See `deploy/rpm/GATEWAY-CONFIG.md` for the full RPM configuration reference. +The RPM packaging auto-generates a self-signed PKI on first start via `init-pki.sh`. Client certs are placed in the CLI auto-discovery directory (`~/.config/openshell/gateways/openshell/mtls/`) so the CLI connects with mTLS without manual configuration. See `deploy/rpm/CONFIGURATION.md` for the full RPM configuration reference and `deploy/rpm/QUICKSTART.md` for the quick start guide. ## Network Model diff --git a/architecture/podman-rootless-networking.md b/architecture/podman-rootless-networking.md index 5f35b3ba2..54ff882f5 100644 --- a/architecture/podman-rootless-networking.md +++ b/architecture/podman-rootless-networking.md @@ -339,7 +339,7 @@ The gateway binds to `0.0.0.0` by default in the RPM packaging. mTLS prevents unauthenticated access even though the gateway is reachable from the network. Client certificates are auto-generated by `init-pki.sh` on first start and bind-mounted into sandbox containers -by the Podman driver. See `deploy/rpm/GATEWAY-CONFIG.md` for the full +by the Podman driver. See `deploy/rpm/CONFIGURATION.md` for the full configuration reference. ## Differences from the Kubernetes Driver diff --git a/deploy/man/openshell-gateway.8.md b/deploy/man/openshell-gateway.8.md new file mode 100644 index 000000000..f551e0c11 --- /dev/null +++ b/deploy/man/openshell-gateway.8.md @@ -0,0 +1,207 @@ +--- +title: OPENSHELL-GATEWAY +section: 8 +header: OpenShell Manual +footer: openshell-gateway +date: 2025 +--- + +# NAME + +openshell-gateway - OpenShell gateway server daemon + +# SYNOPSIS + +**openshell-gateway** \[*OPTIONS*\] + +# DESCRIPTION + +**openshell-gateway** is the control-plane server for OpenShell. It +manages sandbox lifecycle, stores provider credentials, delivers +network and filesystem policies to sandboxes, routes inference +requests, and provides the SSH tunnel endpoint for CLI-to-sandbox +connections. + +When installed via RPM, the gateway runs as a systemd user service +with the Podman compute driver. Sandboxes are rootless Podman +containers on the host. + +The gateway exposes a single port (default 8080) with multiplexed +gRPC and HTTP, secured by mutual TLS (mTLS) by default. + +# OPTIONS + +**--host** *IP* +: IP address to bind all listeners to. Default: **0.0.0.0**. + Environment: **OPENSHELL_BIND_HOST**. + +**--port** *PORT* +: Port for the gRPC/HTTP API. Default: **8080**. + Environment: **OPENSHELL_SERVER_PORT**. + +**--health-port** *PORT* +: Port for unauthenticated health endpoints (/healthz, /readyz). + Set to 0 to disable. Default: **0**. + Environment: **OPENSHELL_HEALTH_PORT**. + +**--metrics-port** *PORT* +: Port for Prometheus metrics (/metrics). Set to 0 to disable. + Default: **0**. Environment: **OPENSHELL_METRICS_PORT**. + +**--log-level** *LEVEL* +: Log level: trace, debug, info, warn, error. Default: **info**. + Environment: **OPENSHELL_LOG_LEVEL**. + +**--db-url** *URL* +: SQLite database URL for state persistence. Required. + Environment: **OPENSHELL_DB_URL**. + +**--drivers** *DRIVER*\[,*DRIVER*\] +: Compute driver. Accepts a comma-delimited list. The gateway + currently requires exactly one driver. Options: **podman**, + **docker**, **kubernetes**. Default: **kubernetes**. + Environment: **OPENSHELL_DRIVERS**. + +**--tls-cert** *PATH* +: Path to server TLS certificate file. Required unless + **--disable-tls** is set. Environment: **OPENSHELL_TLS_CERT**. + +**--tls-key** *PATH* +: Path to server TLS private key file. Required unless + **--disable-tls** is set. Environment: **OPENSHELL_TLS_KEY**. + +**--tls-client-ca** *PATH* +: Path to CA certificate for client certificate verification (mTLS). + Required unless **--disable-tls** is set. + Environment: **OPENSHELL_TLS_CLIENT_CA**. + +**--disable-tls** +: Disable TLS entirely and listen on plaintext HTTP. Use when the + gateway sits behind a TLS-terminating reverse proxy. + Environment: **OPENSHELL_DISABLE_TLS**. + +**--disable-gateway-auth** +: Disable mTLS client certificate requirement. The TLS handshake + accepts connections without a client certificate. Ignored when + **--disable-tls** is set. + Environment: **OPENSHELL_DISABLE_GATEWAY_AUTH**. + +**--sandbox-image** *IMAGE* +: Default container image for sandboxes. + Environment: **OPENSHELL_SANDBOX_IMAGE**. + +**--sandbox-image-pull-policy** *POLICY* +: Image pull policy: Always, IfNotPresent, Never. + Environment: **OPENSHELL_SANDBOX_IMAGE_PULL_POLICY**. + +**--ssh-handshake-secret** *SECRET* +: Shared secret for gateway-to-sandbox SSH handshake. + Environment: **OPENSHELL_SSH_HANDSHAKE_SECRET**. + +**--ssh-handshake-skew-secs** *SECONDS* +: Allowed clock skew in seconds for SSH handshake. Default: **30**. + Environment: **OPENSHELL_SSH_HANDSHAKE_SKEW_SECS**. + +**--ssh-gateway-host** *HOST* +: Public host for the SSH gateway endpoint. Default: **127.0.0.1**. + Environment: **OPENSHELL_SSH_GATEWAY_HOST**. + +**--ssh-gateway-port** *PORT* +: Public port for the SSH gateway endpoint. Default: **8080**. + Environment: **OPENSHELL_SSH_GATEWAY_PORT**. + +**--grpc-endpoint** *URL* +: gRPC endpoint for sandbox callbacks. Should be reachable from + within sandbox containers. + Environment: **OPENSHELL_GRPC_ENDPOINT**. + +# SYSTEMD INTEGRATION + +The RPM installs a systemd user unit at +*/usr/lib/systemd/user/openshell-gateway.service*. Manage the gateway +with standard systemd commands: + + systemctl --user enable --now openshell-gateway + systemctl --user status openshell-gateway + systemctl --user restart openshell-gateway + systemctl --user stop openshell-gateway + +View logs: + + journalctl --user -u openshell-gateway + journalctl --user -u openshell-gateway -f + +The unit runs two **ExecStartPre** scripts on first start: + +1. **init-pki.sh** generates a self-signed PKI bundle for mTLS. +2. **init-gateway-env.sh** generates the environment configuration + file with an auto-generated SSH handshake secret. + +Both scripts are idempotent and skip generation if their output files +already exist. + +To persist the service across logouts: + + sudo loginctl enable-linger $USER + +# CONFIGURATION + +The systemd user unit reads configuration from +*~/.config/openshell/gateway.env*. See **openshell-gateway.env**(5) +for the full variable reference. + +To override individual settings without modifying gateway.env: + + systemctl --user edit openshell-gateway + +This creates a drop-in override that persists across package upgrades. + +# FILES + +*/usr/bin/openshell-gateway* +: Gateway binary. + +*/usr/lib/systemd/user/openshell-gateway.service* +: Systemd user unit file. + +*/usr/libexec/openshell/init-pki.sh* +: PKI bootstrap script. + +*/usr/libexec/openshell/init-gateway-env.sh* +: Gateway environment file generator. + +*~/.config/openshell/gateway.env* +: Gateway environment configuration (generated on first start). + +*~/.local/state/openshell/tls/* +: Auto-generated TLS certificates. + +*~/.local/state/openshell/gateway.db* +: SQLite database for gateway state. + +*~/.config/openshell/gateways/openshell/mtls/* +: Client mTLS certificates for CLI auto-discovery. + +# EXAMPLES + +Start the gateway as a systemd user service: + + systemctl --user enable --now openshell-gateway + +Check gateway health from the CLI: + + openshell gateway add --local https://127.0.0.1:8080 + openshell status + +Override the API port via a systemd drop-in: + + systemctl --user edit openshell-gateway + # Add: [Service] + # Add: Environment=OPENSHELL_SERVER_PORT=9090 + +# SEE ALSO + +**openshell**(1), **openshell-gateway.env**(5), **systemctl**(1), +**journalctl**(1), **loginctl**(1), **podman**(1) + +Full documentation: *https://docs.nvidia.com/openshell/* diff --git a/deploy/man/openshell-gateway.env.5.md b/deploy/man/openshell-gateway.env.5.md new file mode 100644 index 000000000..7e6da7cb6 --- /dev/null +++ b/deploy/man/openshell-gateway.env.5.md @@ -0,0 +1,161 @@ +--- +title: OPENSHELL-GATEWAY.ENV +section: 5 +header: OpenShell Manual +footer: openshell-gateway +date: 2025 +--- + +# NAME + +openshell-gateway.env - OpenShell gateway environment configuration + +# DESCRIPTION + +The **openshell-gateway.env** file contains environment variables that +configure the OpenShell gateway server when running as a systemd user +service. It is generated automatically on first start by +**init-gateway-env.sh** and is not overwritten on subsequent starts or +package upgrades. + +The file uses the standard systemd **EnvironmentFile** format: one +**KEY=VALUE** pair per line. Lines beginning with **#** are comments. +Shell variable expansion is not performed. + +# LOCATION + +The file is located at: + + ~/.config/openshell/gateway.env + +The systemd user unit reads it via: + + EnvironmentFile=-~/.config/openshell/gateway.env + +The **-** prefix means the service starts normally if the file does not +exist (the unit has built-in defaults for all required settings except +the SSH handshake secret). + +# VARIABLES + +## Required + +**OPENSHELL_SSH_HANDSHAKE_SECRET** +: Shared HMAC secret for gateway-to-sandbox SSH handshake + authentication. Auto-generated as a 32-byte hex string on first + start. To regenerate: **openssl rand -hex 32**. + +## Gateway + +**OPENSHELL_BIND_HOST** (default: 0.0.0.0) +: IP address to bind all listeners to. + +**OPENSHELL_SERVER_PORT** (default: 8080) +: Port for the multiplexed gRPC/HTTP API. + +**OPENSHELL_HEALTH_PORT** (default: 0) +: Port for unauthenticated health endpoints (/healthz, /readyz). + Set to a non-zero value to enable a dedicated health listener. + +**OPENSHELL_METRICS_PORT** (default: 0) +: Port for Prometheus metrics endpoint (/metrics). Set to a + non-zero value to enable a dedicated metrics listener. + +**OPENSHELL_LOG_LEVEL** (default: info) +: Log verbosity: **trace**, **debug**, **info**, **warn**, **error**. + +**OPENSHELL_DRIVERS** (default: podman) +: Compute driver for sandbox management. Options: **podman**, + **docker**, **kubernetes**. The RPM unit defaults to **podman**. + +**OPENSHELL_DB_URL** (default: sqlite://$XDG_STATE_HOME/openshell/gateway.db) +: SQLite database URL for gateway state persistence. + +**OPENSHELL_DISABLE_GATEWAY_AUTH** (default: unset) +: Set to **true** to disable mTLS client certificate verification. + +## TLS + +**OPENSHELL_TLS_CERT** (default: auto-generated path) +: Path to server TLS certificate. + +**OPENSHELL_TLS_KEY** (default: auto-generated path) +: Path to server TLS private key. + +**OPENSHELL_TLS_CLIENT_CA** (default: auto-generated path) +: Path to CA certificate for client certificate verification. + +**OPENSHELL_DISABLE_TLS** (default: unset) +: Set to **true** to disable TLS entirely and listen on plaintext + HTTP. Not recommended for production. + +**OPENSHELL_PODMAN_TLS_CA** (default: auto-generated path) +: CA certificate bind-mounted into sandbox containers. + +**OPENSHELL_PODMAN_TLS_CERT** (default: auto-generated path) +: Client certificate bind-mounted into sandbox containers. + +**OPENSHELL_PODMAN_TLS_KEY** (default: auto-generated path) +: Client private key bind-mounted into sandbox containers. + +## Images + +**OPENSHELL_SUPERVISOR_IMAGE** (default: ghcr.io/nvidia/openshell/supervisor:latest) +: OCI image containing the supervisor binary, mounted read-only + into sandbox containers. + +**OPENSHELL_SANDBOX_IMAGE** (default: ghcr.io/nvidia/openshell-community/sandboxes/base:latest) +: Default OCI image for sandbox containers. + +**OPENSHELL_SANDBOX_IMAGE_PULL_POLICY** (default: missing) +: When to pull sandbox images: **always** (every sandbox creation), + **missing** (only if not cached locally), **never** (use cached + only), **newer** (pull if a newer version exists). + +## Podman Driver + +**OPENSHELL_PODMAN_SOCKET** (default: $XDG_RUNTIME_DIR/podman/podman.sock) +: Path to the Podman API Unix socket. + +**OPENSHELL_NETWORK_NAME** (default: openshell) +: Name of the Podman bridge network for sandbox containers. Created + automatically if it does not exist. + +**OPENSHELL_STOP_TIMEOUT** (default: 10) +: Seconds to wait after SIGTERM before sending SIGKILL when stopping + a sandbox container. + +# EXAMPLES + +Change the API port to 9090: + + OPENSHELL_SERVER_PORT=9090 + +Pin sandbox images to a specific version: + + OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 + OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 + +Air-gapped deployment (pre-loaded images, no registry access): + + OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never + +Enable debug logging: + + OPENSHELL_LOG_LEVEL=debug + +Use externally-managed TLS certificates: + + OPENSHELL_TLS_CERT=/etc/pki/tls/certs/openshell.crt + OPENSHELL_TLS_KEY=/etc/pki/tls/private/openshell.key + OPENSHELL_TLS_CLIENT_CA=/etc/pki/tls/certs/openshell-ca.crt + +Disable TLS (behind a reverse proxy): + + OPENSHELL_DISABLE_TLS=true + +# SEE ALSO + +**openshell-gateway**(8), **openshell**(1), **systemd.exec**(5) + +Full documentation: *https://docs.nvidia.com/openshell/* diff --git a/deploy/man/openshell.1.md b/deploy/man/openshell.1.md new file mode 100644 index 000000000..65e060899 --- /dev/null +++ b/deploy/man/openshell.1.md @@ -0,0 +1,217 @@ +--- +title: OPENSHELL +section: 1 +header: OpenShell Manual +footer: openshell +date: 2025 +--- + +# NAME + +openshell - CLI for managing OpenShell sandboxes, gateways, and providers + +# SYNOPSIS + +**openshell** \[*OPTIONS*\] *COMMAND* \[*ARGS*\] + +# DESCRIPTION + +**openshell** is the command-line interface for OpenShell, a platform +providing safe, sandboxed runtimes for autonomous AI agents. It manages +the gateway control plane, sandbox lifecycle, credential providers, +network policies, and inference routing. + +The CLI communicates with a gateway server over gRPC. The gateway can +run as a systemd user service (RPM deployment with Podman driver), a +Docker container with embedded K3s, or behind a cloud reverse proxy. + +# COMMANDS + +## Gateway Management + +**gateway start** +: Deploy a new gateway using Docker (not applicable to RPM deployments; + use **systemctl --user start openshell-gateway** instead). + +**gateway stop** +: Stop a Docker-managed gateway (use **systemctl --user stop + openshell-gateway** for RPM deployments). + +**gateway destroy** \[**--name** *NAME*\] +: Destroy a gateway. For RPM deployments, this removes the CLI + registration only. + +**gateway add** *ENDPOINT* \[**--local**\] \[**--name** *NAME*\] \[**--remote** *USER@HOST*\] +: Register an existing gateway with the CLI. + +**gateway select** \[*NAME*\] +: List registered gateways or switch the active gateway. + +**gateway info** \[**--name** *NAME*\] +: Show deployment details for a gateway. + +**gateway login** +: Re-authenticate with a cloud gateway. + +**status** +: Check the health of the active gateway. + +## Sandbox Management + +**sandbox create** \[**--from** *IMAGE*\] \[**--policy** *FILE*\] \[**--provider** *NAME*\] \[**--gpu**\] \[**--upload** *SRC:DST*\] \[**--forward** *PORT*\] \[**--** *COMMAND*\] +: Create a new sandbox. If no gateway exists, auto-bootstraps one + (Docker mode only). + +**sandbox list** \[**--selector** *LABEL*\] +: List all sandboxes on the active gateway. + +**sandbox get** *NAME* +: Show details for a sandbox. + +**sandbox delete** *NAME* \| **--all** +: Delete one or all sandboxes. + +**sandbox connect** *NAME* \[**--editor** *EDITOR*\] +: SSH into a running sandbox. + +**sandbox exec** **-n** *NAME* \[**--workdir** *DIR*\] **--** *COMMAND* +: Execute a command in a sandbox. + +**sandbox upload** *NAME* *LOCAL* *REMOTE* +: Upload files to a sandbox. + +**sandbox download** *NAME* *REMOTE* *LOCAL* +: Download files from a sandbox. + +## Policy Management + +**policy get** *SANDBOX* \[**--full**\] +: Show the active policy for a sandbox. + +**policy set** *SANDBOX* **--policy** *FILE* \[**--wait**\] +: Apply a policy to a sandbox. + +**policy update** *SANDBOX* \[**--add-endpoint** *SPEC*\] \[**--add-allow** *RULE*\] +: Incrementally update a sandbox policy. + +**policy list** *SANDBOX* +: Show policy revision history. + +**policy prove** **--policy** *FILE* \[**--credentials** *FILE*\] +: Verify policy properties. + +## Provider Management + +**provider create** **--name** *NAME* **--type** *TYPE* \[**--from-existing**\] \[**--credential** *KEY=VALUE*\] +: Create a credential provider. + +**provider list** +: List all providers. + +**provider get** *NAME* +: Show provider details. + +**provider update** *NAME* \[**--from-existing**\] \[**--credential** *KEY=VALUE*\] +: Update provider credentials. + +**provider delete** *NAME* +: Delete a provider. + +## Inference Routing + +**inference set** **--provider** *NAME* **--model** *MODEL* +: Configure inference routing. + +**inference get** +: Show current inference configuration. + +**inference update** \[**--model** *MODEL*\] +: Update inference configuration. + +## Other + +**logs** *SANDBOX* \[**--tail**\] +: View sandbox logs. + +**forward start** *PORT* *SANDBOX* \[**-d**\] +: Start port forwarding to a sandbox. + +**forward stop** *PORT* +: Stop port forwarding. + +**forward list** +: List active port forwards. + +**term** +: Open the real-time TUI dashboard. + +**doctor check** \| **logs** \| **exec** \| **llm.txt** +: Diagnostic tools (Docker/K3s mode only; see **TROUBLESHOOTING** + section for RPM alternatives). + +**completions** *SHELL* +: Generate shell completions (bash, zsh, fish). + +# GLOBAL OPTIONS + +**-g**, **--gateway** *NAME* +: Target a specific gateway by name. + +**--gateway-endpoint** *URL* +: Connect to a gateway by URL directly. + +**-h**, **--help** +: Print help information. + +**-V**, **--version** +: Print version. + +# ENVIRONMENT + +**OPENSHELL_GATEWAY** +: Default gateway name (overrides active gateway). + +**OPENSHELL_GATEWAY_ENDPOINT** +: Direct gateway URL (bypasses metadata lookup). + +**ANTHROPIC_API_KEY**, **OPENAI_API_KEY**, **OPENROUTER_API_KEY** +: API keys discovered by auto-provider creation. + +**GITHUB_TOKEN**, **GH_TOKEN** +: GitHub credentials for provider auto-discovery. + +# FILES + +*~/.config/openshell/gateways/* +: Gateway metadata and mTLS certificates. + +*~/.config/openshell/active_gateway* +: Name of the currently active gateway. + +# EXAMPLES + +Register the local RPM gateway and create a sandbox: + + openshell gateway add --local https://127.0.0.1:8080 + openshell sandbox create -- claude + +List sandboxes and connect to one: + + openshell sandbox list + openshell sandbox connect my-sandbox + +Create a provider from a local environment variable: + + openshell provider create --name openai --type openai --from-existing + +Check gateway health: + + openshell status + +# SEE ALSO + +**openshell-gateway**(8), **openshell-gateway.env**(5) + +Full documentation: *https://docs.nvidia.com/openshell/* + +Run **openshell** *COMMAND* **--help** for detailed help on any command. diff --git a/deploy/rpm/GATEWAY-CONFIG.md b/deploy/rpm/CONFIGURATION.md similarity index 50% rename from deploy/rpm/GATEWAY-CONFIG.md rename to deploy/rpm/CONFIGURATION.md index 75ba5ddb9..724d283de 100644 --- a/deploy/rpm/GATEWAY-CONFIG.md +++ b/deploy/rpm/CONFIGURATION.md @@ -1,27 +1,10 @@ # OpenShell Gateway Configuration (RPM) -This document covers the configuration of the OpenShell gateway when -installed via the RPM package on Fedora and RHEL systems. +Configuration reference for the OpenShell gateway when installed via +the RPM package on Fedora and RHEL systems. -## Quick start - -```shell -# Enable and start the gateway (rootless Podman, mTLS enabled): -systemctl --user enable --now openshell-gateway - -# Verify the gateway is running: -openshell sandbox list - -# Make the service persist across reboots without an active login: -sudo loginctl enable-linger $USER -``` - -On first start, the gateway auto-generates: - -- A self-signed PKI bundle (CA, server cert, client cert) for mTLS -- An SSH handshake secret for sandbox authentication - -No manual certificate setup is required. +For first-time setup, see QUICKSTART.md. For troubleshooting, see +TROUBLESHOOTING.md. ## TLS (mTLS) @@ -34,8 +17,8 @@ though it listens on all interfaces (`0.0.0.0`). On first start, the `init-pki.sh` script generates certificates using OpenSSL: -| File | Purpose | Location (user unit) | -|------|---------|---------------------| +| File | Purpose | Location | +|------|---------|----------| | CA certificate | Root of trust | `~/.local/state/openshell/tls/ca.crt` | | CA private key | Signs server and client certs | `~/.local/state/openshell/tls/ca.key` | | Server certificate | Gateway TLS identity | `~/.local/state/openshell/tls/server/tls.crt` | @@ -68,28 +51,33 @@ Names: - `host.docker.internal` - `127.0.0.1` +To connect from a remote machine, you need externally-managed +certificates with additional SANs. See "Remote CLI access" in +TROUBLESHOOTING.md. + ### Using externally-managed certificates To use certificates from an external CA or cert-manager: -1. Place the server cert, key, and CA cert on the filesystem -1. Edit `/etc/sysconfig/openshell-gateway` (system unit) or use - `systemctl --user edit openshell-gateway` (user unit) to override: +1. Place the server cert, key, and CA cert on the filesystem. -```shell -OPENSHELL_TLS_CERT=/path/to/server/tls.crt -OPENSHELL_TLS_KEY=/path/to/server/tls.key -OPENSHELL_TLS_CLIENT_CA=/path/to/ca.crt -``` +1. Edit `~/.config/openshell/gateway.env` or use + `systemctl --user edit openshell-gateway` to override: + + ```shell + OPENSHELL_TLS_CERT=/path/to/server/tls.crt + OPENSHELL_TLS_KEY=/path/to/server/tls.key + OPENSHELL_TLS_CLIENT_CA=/path/to/ca.crt + ``` 1. Place the client cert where the CLI expects it: -``` -~/.config/openshell/gateways/openshell/mtls/ - ca.crt - tls.crt - tls.key -``` + ``` + ~/.config/openshell/gateways/openshell/mtls/ + ca.crt + tls.crt + tls.key + ``` ### Rotating certificates @@ -106,14 +94,14 @@ The gateway regenerates the PKI on next start. To disable TLS (not recommended for production): -1. Edit the sysconfig file or use a systemd override: +1. Edit `~/.config/openshell/gateway.env`: -```shell -OPENSHELL_DISABLE_TLS=true -``` + ```shell + OPENSHELL_DISABLE_TLS=true + ``` -1. Remove or comment out the `OPENSHELL_TLS_*` and - `OPENSHELL_PODMAN_TLS_*` variables. +1. Comment out the `OPENSHELL_TLS_*` and `OPENSHELL_PODMAN_TLS_*` + variables if they are set. 1. Restart the gateway. @@ -141,11 +129,19 @@ Inside the container, the supervisor reads them from: - `/etc/openshell/tls/client/tls.crt` - `/etc/openshell/tls/client/tls.key` +On SELinux-enabled systems, the Podman driver automatically applies the +`:z` relabel option to these bind mounts. No manual SELinux +configuration is required. + ## Configuration reference -All settings are controlled via environment variables. The system unit -reads from `/etc/sysconfig/openshell-gateway`. The user unit reads from -`~/.config/openshell/gateway.env` and systemd `Environment=` directives. +All settings are controlled via environment variables. The user unit +reads from `~/.config/openshell/gateway.env` (generated on first start) +and from `Environment=` directives in the systemd unit. + +Values in `gateway.env` override the unit defaults. Use +`systemctl --user edit openshell-gateway` to add overrides that persist +across package upgrades. ### Gateway settings @@ -153,9 +149,13 @@ reads from `/etc/sysconfig/openshell-gateway`. The user unit reads from |----------|---------|-------------| | `OPENSHELL_BIND_HOST` | `0.0.0.0` | IP address to bind all listeners to | | `OPENSHELL_SERVER_PORT` | `8080` | Port for the gRPC/HTTP API | +| `OPENSHELL_HEALTH_PORT` | `0` (disabled) | Port for unauthenticated health endpoints (`/healthz`, `/readyz`). Set to a non-zero value to enable. | +| `OPENSHELL_METRICS_PORT` | `0` (disabled) | Port for Prometheus metrics (`/metrics`). Set to a non-zero value to enable. | +| `OPENSHELL_LOG_LEVEL` | `info` | Log level: `trace`, `debug`, `info`, `warn`, `error` | | `OPENSHELL_DRIVERS` | `podman` | Compute driver (`podman`, `docker`, `kubernetes`) | -| `OPENSHELL_DB_URL` | (varies) | SQLite database URL for state persistence | +| `OPENSHELL_DB_URL` | `sqlite://$XDG_STATE_HOME/openshell/gateway.db` | SQLite database URL for state persistence | | `OPENSHELL_SSH_HANDSHAKE_SECRET` | (auto-generated) | Shared secret for sandbox SSH authentication | +| `OPENSHELL_DISABLE_GATEWAY_AUTH` | (unset) | Set to `true` to skip mTLS client certificate checks | ### TLS settings @@ -173,29 +173,75 @@ reads from `/etc/sysconfig/openshell-gateway`. The user unit reads from | Variable | Default | Description | |----------|---------|-------------| -| `OPENSHELL_SUPERVISOR_IMAGE` | `ghcr.io/.../supervisor:latest` | Supervisor binary OCI image | -| `OPENSHELL_SANDBOX_IMAGE` | `ghcr.io/.../sandboxes/base:latest` | Default sandbox base image | +| `OPENSHELL_SUPERVISOR_IMAGE` | `ghcr.io/nvidia/openshell/supervisor:latest` | Supervisor binary OCI image | +| `OPENSHELL_SANDBOX_IMAGE` | `ghcr.io/nvidia/openshell-community/sandboxes/base:latest` | Default sandbox base image | +| `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY` | `missing` | Image pull policy: `always`, `missing`, `never`, `newer` | -## File locations +### Podman driver settings -### User unit (systemctl --user) +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENSHELL_PODMAN_SOCKET` | `$XDG_RUNTIME_DIR/podman/podman.sock` | Podman API Unix socket path | +| `OPENSHELL_NETWORK_NAME` | `openshell` | Podman bridge network name for sandbox containers | +| `OPENSHELL_STOP_TIMEOUT` | `10` | Container stop timeout in seconds (SIGTERM then SIGKILL) | + +### Image management + +The gateway pulls container images automatically on first sandbox +creation. The default pull policy is `missing`, which means images are +pulled once and then cached by Podman. + +To update cached images: + +```shell +podman pull ghcr.io/nvidia/openshell/supervisor:latest +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` + +Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` to pull on every +sandbox creation. + +To pin specific image versions instead of `:latest`: + +```shell +OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 +OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 +``` + +For air-gapped environments: + +1. On a connected machine, pull and save the images: + + ```shell + podman pull ghcr.io/nvidia/openshell/supervisor:latest + podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest + podman save -o supervisor.tar ghcr.io/nvidia/openshell/supervisor:latest + podman save -o sandbox.tar ghcr.io/nvidia/openshell-community/sandboxes/base:latest + ``` + +1. Transfer the tarballs to the air-gapped host and load them: + + ```shell + podman load -i supervisor.tar + podman load -i sandbox.tar + ``` + +1. Set pull policy to `never`: + + ```shell + OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never + ``` + +## File locations | Purpose | Path | |---------|------| | Gateway binary | `/usr/bin/openshell-gateway` | | CLI binary | `/usr/bin/openshell` | -| Systemd unit | `/usr/lib/systemd/user/openshell-gateway.service` | +| Systemd user unit | `/usr/lib/systemd/user/openshell-gateway.service` | | PKI bootstrap script | `/usr/libexec/openshell/init-pki.sh` | +| Env generator script | `/usr/libexec/openshell/init-gateway-env.sh` | | TLS certificates | `~/.local/state/openshell/tls/` | | CLI client certs | `~/.config/openshell/gateways/openshell/mtls/` | | Gateway database | `~/.local/state/openshell/gateway.db` | -| SSH handshake secret | `~/.config/openshell/gateway.env` | - -### System unit (systemctl) - -| Purpose | Path | -|---------|------| -| Systemd unit | `/usr/lib/systemd/system/openshell-gateway.service` | -| Configuration | `/etc/sysconfig/openshell-gateway` | -| TLS certificates | `/var/lib/openshell/tls/` | -| Gateway database | `/var/lib/openshell/gateway.db` | +| Gateway configuration | `~/.config/openshell/gateway.env` | diff --git a/deploy/rpm/QUICKSTART.md b/deploy/rpm/QUICKSTART.md new file mode 100644 index 000000000..3be0c6fb6 --- /dev/null +++ b/deploy/rpm/QUICKSTART.md @@ -0,0 +1,158 @@ +# OpenShell RPM Quick Start + +Get from `dnf install` to a running sandbox in five minutes. + +## Prerequisites + +### Podman (rootless) + +The gateway uses rootless Podman for sandbox containers. Verify +Podman is installed and the cgroup version is v2: + +```shell +podman --version +podman info --format '{{.Host.CgroupsVersion}}' +``` + +The cgroup version must be `v2`. If it reports `v1`, enable the +unified cgroup hierarchy and reboot: + +```shell +sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1" +sudo reboot +``` + +### Subordinate UID/GID ranges + +Rootless containers require subordinate UID/GID mappings: + +```shell +grep $USER /etc/subuid /etc/subgid +``` + +If empty, add entries: + +```shell +sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 $USER +``` + +### Podman socket + +The gateway communicates with Podman over its API socket. Enable +socket activation: + +```shell +systemctl --user enable --now podman.socket +``` + +### Network access + +The gateway pulls container images from ghcr.io on first sandbox +creation. Ensure the host can reach ghcr.io over HTTPS (port 443). + +For air-gapped environments, pre-load images with `podman pull` and +set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never` in +`~/.config/openshell/gateway.env`. See CONFIGURATION.md for details. + +## Start the gateway + +```shell +systemctl --user enable --now openshell-gateway +``` + +On first start, the gateway automatically generates: + +- A self-signed PKI bundle (CA, server cert, client cert) for mTLS +- An SSH handshake secret for sandbox authentication +- A commented configuration file at `~/.config/openshell/gateway.env` + +Verify the service is running: + +```shell +systemctl --user status openshell-gateway +``` + +## Register the gateway with the CLI + +The CLI needs to know where the gateway is. Register it: + +```shell +openshell gateway add --local https://127.0.0.1:8080 +``` + +This discovers the pre-provisioned mTLS certificates at +`~/.config/openshell/gateways/openshell/mtls/` and sets the gateway +as active. + +Verify the connection: + +```shell +openshell status +``` + +## Persist across reboots + +By default, user services stop when you log out. To keep the gateway +running after logout and across reboots: + +```shell +sudo loginctl enable-linger $USER +``` + +Without this, the gateway and all running sandboxes are killed when +your login session ends. This is required for any headless or +production use. + +## Create your first sandbox + +Set your API key in the environment, then create a sandbox: + +```shell +export ANTHROPIC_API_KEY=sk-... +openshell sandbox create -- claude +``` + +The CLI detects the agent, prompts to create a credential provider +from your local environment, pulls the sandbox image from ghcr.io, +and connects you to the running sandbox. + +Other agents: + +```shell +openshell sandbox create -- opencode +openshell sandbox create -- codex +``` + +## Set up providers manually + +If you prefer to configure providers before creating sandboxes: + +```shell +# Create a provider from a local environment variable +openshell provider create --name anthropic --type anthropic --from-existing + +# Or supply the credential directly +openshell provider create --name openai --type openai \ + --credential OPENAI_API_KEY=sk-... + +# List configured providers +openshell provider list +``` + +## Configure inference routing (optional) + +To route inference requests through a specific provider and model: + +```shell +openshell inference set --provider openai --model gpt-4 +openshell inference get +``` + +## Next steps + +- See CONFIGURATION.md for TLS settings, environment variables, and + file locations. +- See TROUBLESHOOTING.md for CLI compatibility notes, remote access, + and common issues. +- Run `man openshell` for the CLI reference. +- Run `man openshell-gateway` for the gateway daemon reference. diff --git a/deploy/rpm/TROUBLESHOOTING.md b/deploy/rpm/TROUBLESHOOTING.md new file mode 100644 index 000000000..78ef4c475 --- /dev/null +++ b/deploy/rpm/TROUBLESHOOTING.md @@ -0,0 +1,230 @@ +# OpenShell RPM Troubleshooting + +Troubleshooting guide, CLI compatibility notes, remote access setup, +and upgrade procedures for the RPM deployment. + +## CLI compatibility + +The RPM installs the gateway as a systemd user service with the Podman +compute driver. The published online docs and some CLI commands assume +a Docker/K3s deployment model. This section clarifies which commands +work, which do not, and what to use instead. + +### Commands that work normally + +All sandbox, provider, policy, inference, and settings commands +communicate with the gateway over gRPC and work identically regardless +of deployment mode: + +``` +openshell status +openshell sandbox create|list|get|delete|connect|exec +openshell logs +openshell provider create|list|get|update|delete +openshell policy get|set|update|list|prove +openshell inference set|get|update +openshell settings get|set +openshell forward start|stop|list +openshell term +openshell gateway add|select|info +openshell gateway destroy (removes CLI registration only) +``` + +### Commands that do not apply + +These commands manage Docker container lifecycle and are not applicable +to the RPM/systemd deployment. Use the systemd equivalents instead. + +| CLI command | RPM alternative | +|-------------|-----------------| +| `openshell gateway start` | `systemctl --user start openshell-gateway` | +| `openshell gateway stop` | `systemctl --user stop openshell-gateway` | +| `openshell doctor check` | `systemctl --user status openshell-gateway` | +| `openshell doctor logs` | `journalctl --user -u openshell-gateway` | +| `openshell doctor logs --tail` | `journalctl --user -u openshell-gateway -f` | +| `openshell doctor exec` | Not applicable (no K3s container) | + +### Building from local Dockerfiles + +`openshell sandbox create --from ./Dockerfile` builds via Docker and +pushes into K3s containerd. With the Podman driver, build the image +with Podman and reference it directly: + +```shell +podman build -t my-sandbox ./my-dir +openshell sandbox create --from localhost/my-sandbox +``` + +## Remote CLI access + +The auto-generated server certificate only includes SANs for +`localhost`, `127.0.0.1`, and Podman-internal names. To connect from a +different machine, choose one of the following approaches. + +### Option 1: SSH tunnel (simplest) + +Forward the gateway port over SSH and connect via localhost: + +```shell +# On the remote CLI machine: +ssh -L 8080:127.0.0.1:8080 user@gateway-host + +# In another terminal on the same machine: +# Copy the client certs from the gateway host first: +scp -r user@gateway-host:~/.config/openshell/gateways/openshell/mtls/ \ + ~/.config/openshell/gateways/openshell/mtls/ + +openshell gateway add --local https://127.0.0.1:8080 +openshell status +``` + +### Option 2: Externally-managed certificates + +Generate certificates that include the server's hostname or IP in the +SANs. See "Using externally-managed certificates" in CONFIGURATION.md. + +After placing the server and client certs, register from the remote +CLI: + +```shell +# Copy client certs to the remote CLI machine +mkdir -p ~/.config/openshell/gateways/openshell/mtls/ +cp ca.crt tls.crt tls.key ~/.config/openshell/gateways/openshell/mtls/ + +openshell gateway add --local https://:8080 +``` + +### Firewall + +For remote access, open the gateway port in firewalld: + +```shell +sudo firewall-cmd --add-port=8080/tcp --permanent +sudo firewall-cmd --reload +``` + +For localhost-only access (the default use case), no firewall changes +are needed. Loopback traffic is not filtered by firewalld. + +mTLS prevents unauthenticated access even when the port is open to the +network. + +## Common issues + +### "No active gateway" + +The CLI cannot find a registered gateway. This happens when the +gateway is running but has not been registered with the CLI. + +```shell +openshell gateway add --local https://127.0.0.1:8080 +``` + +### Gateway fails to start + +Check the journal for error details: + +```shell +journalctl --user -u openshell-gateway --no-pager -n 50 +``` + +Common causes: + +**cgroups v1 detected.** The Podman driver requires cgroups v2. +Check the version: + +```shell +stat -fc %T /sys/fs/cgroup +``` + +Expected output: `cgroup2fs`. If it shows `tmpfs`, enable cgroups v2: + +```shell +sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1" +sudo reboot +``` + +**Podman socket not available.** Ensure socket activation is enabled: + +```shell +systemctl --user enable --now podman.socket +systemctl --user status podman.socket +``` + +**TLS certificate errors.** If certs are corrupted, regenerate them: + +```shell +rm -rf ~/.local/state/openshell/tls +systemctl --user restart openshell-gateway +``` + +### Sandbox creation fails + +**subuid/subgid missing.** Rootless Podman requires subordinate +UID/GID ranges. If the journal shows warnings about `/etc/subuid` or +container creation fails: + +```shell +grep $USER /etc/subuid /etc/subgid +# If empty: +sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 $USER +``` + +**Image pull failure.** Verify ghcr.io is reachable: + +```shell +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` + +### Images not updating + +The default image pull policy is `missing` -- images are pulled once +and cached. To update: + +```shell +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +podman pull ghcr.io/nvidia/openshell/supervisor:latest +``` + +Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` in +`~/.config/openshell/gateway.env` and restart the gateway. + +### Gateway stops on logout + +Enable lingering so the service survives logout: + +```shell +sudo loginctl enable-linger $USER +``` + +## SELinux + +No SELinux configuration is required on stock Fedora or RHEL. The +Podman driver automatically applies the `:z` relabel option to TLS +bind mounts when SELinux is detected, allowing sandbox containers to +read the certificates through the MAC policy. + +## Upgrading + +After upgrading the RPM packages: + +```shell +sudo dnf update openshell openshell-gateway +systemctl --user restart openshell-gateway +``` + +The SQLite database schema is auto-migrated on startup. Running +sandboxes are stopped during the restart. + +The `gateway.env` file is not overwritten during upgrades. The +`init-gateway-env.sh` script is idempotent and only generates the file +on first start. New configuration options from newer versions can be +added manually by referencing CONFIGURATION.md or running +`openshell-gateway --help`. + +To pick up new container images after an upgrade: + +```shell +podman pull ghcr.io/nvidia/openshell/supervisor:latest +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest +``` diff --git a/deploy/rpm/init-gateway-env.sh b/deploy/rpm/init-gateway-env.sh new file mode 100644 index 000000000..7f7287c48 --- /dev/null +++ b/deploy/rpm/init-gateway-env.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Generate the gateway environment configuration file on first start. +# +# Called from the systemd ExecStartPre directive to bootstrap the +# gateway configuration. Idempotent: exits immediately if the file +# already exists. +# +# Usage: +# init-gateway-env.sh +# +# The generated file contains an auto-generated SSH handshake secret +# and commented defaults for all gateway environment variables. + +set -euo pipefail + +ENV_FILE="${1:?Usage: init-gateway-env.sh }" + +# ── Idempotent: skip if env file already exists ───────────────────── +if [ -f "${ENV_FILE}" ]; then + exit 0 +fi + +# ── Create parent directory ───────────────────────────────────────── +mkdir -p "$(dirname "${ENV_FILE}")" + +# ── Generate SSH handshake secret ─────────────────────────────────── +SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f) + +# ── Write environment file ────────────────────────────────────────── +cat > "${ENV_FILE}" << EOF +# OpenShell Gateway Environment Configuration +# Generated on first start. Edit freely; this file is not overwritten. +# +# Run 'openshell-gateway --help' for the full list of options. +# See /usr/share/doc/openshell-gateway/ for guides. + +# ---- Required ---- + +# Shared secret for gateway-to-sandbox SSH handshake authentication. +# Auto-generated on first start. To regenerate: +# openssl rand -hex 32 +OPENSHELL_SSH_HANDSHAKE_SECRET=${SECRET} + +# ---- Optional (uncomment to override defaults) ---- + +# Database URL for gateway state persistence. +# Default for the user unit: sqlite://\$XDG_STATE_HOME/openshell/gateway.db +#OPENSHELL_DB_URL=sqlite:///path/to/gateway.db + +# Compute driver: podman (default for RPM), docker, kubernetes. +#OPENSHELL_DRIVERS=podman + +# Bind address. 0.0.0.0 listens on all interfaces; mTLS prevents +# unauthenticated access. +#OPENSHELL_BIND_HOST=0.0.0.0 + +# API port (default: 8080). +#OPENSHELL_SERVER_PORT=8080 + +# Log level: trace, debug, info, warn, error. +#OPENSHELL_LOG_LEVEL=info + +# ---- Images ---- + +# Supervisor binary OCI image (mounted read-only into sandboxes). +#OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest + +# Default sandbox base image. +#OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest + +# Image pull policy: always, missing (default), never, newer. +# Use 'always' to pick up new tags automatically. +# Use 'never' for air-gapped environments with pre-loaded images. +#OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=missing + +# ---- TLS (mTLS enabled by default) ---- +# PKI is auto-generated by init-pki.sh on first start. Client certs are +# placed in ~/.config/openshell/gateways/openshell/mtls/ so the CLI +# discovers them automatically. +# +# To use externally-managed certs, uncomment and edit the paths below. +# To rotate certs, delete ~/.local/state/openshell/tls/ and restart. +# To disable TLS (NOT RECOMMENDED), uncomment the next line and +# comment out all OPENSHELL_TLS_* and OPENSHELL_PODMAN_TLS_* variables. +#OPENSHELL_DISABLE_TLS=true + +# Server TLS (gateway listens with these certs). +#OPENSHELL_TLS_CERT=\$XDG_STATE_HOME/openshell/tls/server/tls.crt +#OPENSHELL_TLS_KEY=\$XDG_STATE_HOME/openshell/tls/server/tls.key +#OPENSHELL_TLS_CLIENT_CA=\$XDG_STATE_HOME/openshell/tls/ca.crt + +# Podman driver: client certs bind-mounted into sandbox containers. +#OPENSHELL_PODMAN_TLS_CA=\$XDG_STATE_HOME/openshell/tls/ca.crt +#OPENSHELL_PODMAN_TLS_CERT=\$XDG_STATE_HOME/openshell/tls/client/tls.crt +#OPENSHELL_PODMAN_TLS_KEY=\$XDG_STATE_HOME/openshell/tls/client/tls.key + +# ---- Podman driver ---- + +# Podman API Unix socket path. +#OPENSHELL_PODMAN_SOCKET=\$XDG_RUNTIME_DIR/podman/podman.sock + +# Podman bridge network name for sandbox containers. +#OPENSHELL_NETWORK_NAME=openshell + +# Container stop timeout in seconds (SIGTERM then SIGKILL). +#OPENSHELL_STOP_TIMEOUT=10 +EOF + +chmod 600 "${ENV_FILE}" +echo "Gateway environment generated: ${ENV_FILE}" diff --git a/openshell.spec b/openshell.spec index dff4adb43..0cbef914a 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260430141223488997.rpm.46.g7ff80c76%{?dist} +Release: 1.20260501111549922934.rpm.51.g7c400fa8%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -35,6 +35,9 @@ BuildRequires: clang-devel BuildRequires: z3-devel BuildRequires: systemd-rpm-macros +# Man page generation +BuildRequires: pandoc + # Python sub-package build dependencies BuildRequires: python3-devel @@ -104,6 +107,11 @@ export CARGO_BUILD_JOBS=%{_smp_build_ncpus} export OPENSHELL_IMAGE_TAG=latest cargo build --release --bin openshell --bin openshell-gateway +# Build man pages from markdown +pandoc -s -t man deploy/man/openshell.1.md -o openshell.1 +pandoc -s -t man deploy/man/openshell-gateway.8.md -o openshell-gateway.8 +pandoc -s -t man deploy/man/openshell-gateway.env.5.md -o openshell-gateway.env.5 + %install # --- CLI binary --- install -Dpm 0755 target/release/%{name} %{buildroot}%{_bindir}/%{name} @@ -111,64 +119,35 @@ install -Dpm 0755 target/release/%{name} %{buildroot}%{_bindir}/%{name} # --- Gateway binary --- install -Dpm 0755 target/release/%{name}-gateway %{buildroot}%{_bindir}/%{name}-gateway -# --- Gateway systemd unit --- -install -d %{buildroot}%{_unitdir} -cat > %{buildroot}%{_unitdir}/%{name}-gateway.service << 'EOF' -[Unit] -Description=OpenShell Gateway -Documentation=https://github.com/NVIDIA/OpenShell -After=network-online.target podman.service -Requires=podman.service - -[Service] -Type=exec -EnvironmentFile=/etc/sysconfig/openshell-gateway -ExecStart=/usr/bin/openshell-gateway -StateDirectory=openshell -Restart=on-failure -RestartSec=5 - -# Security hardening -NoNewPrivileges=yes -ProtectSystem=strict -PrivateTmp=yes -RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX - -[Install] -WantedBy=multi-user.target -EOF - # --- Gateway systemd user unit (rootless Podman) --- # Installed to the systemd user unit directory so any user can run: # systemctl --user enable --now openshell-gateway.service -# This will automatically start podman.service via Requires= dependency. +# Podman socket activation provides the container API. install -d %{buildroot}%{_userunitdir} cat > %{buildroot}%{_userunitdir}/%{name}-gateway.service << 'EOF' [Unit] Description=OpenShell Gateway (user) Documentation=https://github.com/NVIDIA/OpenShell -After=podman.service -Requires=podman.service +After=podman.socket +Requires=podman.socket [Service] Type=exec # Self-contained defaults for rootless operation with mTLS. # -# PKI is auto-generated on first start. Client certs are placed in -# ~/.config/openshell/gateways/openshell/mtls/ so the CLI discovers -# them automatically. See /usr/share/doc/openshell-gateway/ for details. -# -# The SSH handshake secret is auto-generated on first start into -# ~/.config/openshell/gateway.env (mode 0600). To override, edit -# that file or use: systemctl --user edit openshell-gateway.service +# PKI and gateway.env are auto-generated on first start. Client certs +# are placed in ~/.config/openshell/gateways/openshell/mtls/ so the +# CLI discovers them automatically. +# See /usr/share/doc/openshell-gateway/ for details. # Auto-generate PKI on first start if not present. # %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. ExecStartPre=%{_libexecdir}/openshell/init-pki.sh %%S/openshell/tls -# Auto-generate SSH handshake secret on first start if not present. +# Auto-generate gateway.env (SSH handshake secret + commented config +# reference) on first start if not present. # %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. -ExecStartPre=/bin/sh -c 'ENV=%%E/openshell/gateway.env; [ -f "$ENV" ] || { mkdir -p %%E/openshell && echo "OPENSHELL_SSH_HANDSHAKE_SECRET=$(od -An -tx1 -N32 /dev/urandom | tr -dc 0-9a-f)" > "$ENV" && chmod 600 "$ENV"; }' +ExecStartPre=%{_libexecdir}/openshell/init-gateway-env.sh %%E/openshell/gateway.env EnvironmentFile=-%%E/openshell/gateway.env Environment=OPENSHELL_BIND_HOST=0.0.0.0 Environment=OPENSHELL_DRIVERS=podman @@ -198,78 +177,21 @@ RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX WantedBy=default.target EOF -# --- Gateway environment file --- -# Provides defaults for the Podman driver and GHCR image references. -# Mode 0640: contains the SSH handshake secret -- must not be world-readable. -# Admins can override these values by editing this file. -install -d %{buildroot}%{_sysconfdir}/sysconfig -install -pm 0640 /dev/null %{buildroot}%{_sysconfdir}/sysconfig/%{name}-gateway -cat > %{buildroot}%{_sysconfdir}/sysconfig/%{name}-gateway << 'EOF' -# OpenShell Gateway configuration -# See: openshell-gateway --help for all available options. - -# ---- Required settings ---- - -# Shared secret for gateway-to-sandbox SSH handshake authentication. -# REQUIRED: Generate a value before starting the service: -# openssl rand -hex 32 -# The same secret must be shared with every sandbox that connects to -# this gateway. -OPENSHELL_SSH_HANDSHAKE_SECRET= - -# Database URL for gateway state persistence. -# For the system unit this defaults to /var/lib/openshell/gateway.db. -# The user unit overrides this to ~/.local/state/openshell/gateway.db. -OPENSHELL_DB_URL=sqlite:///var/lib/openshell/gateway.db - -# ---- Optional settings ---- - -# Compute driver: use Podman for sandbox container lifecycle. -OPENSHELL_DRIVERS=podman - -# Supervisor image mounted into sandbox containers. -OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest - -# Default sandbox base image. -OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest - -# Bind address. Default 0.0.0.0 listens on all interfaces; mTLS -# prevents unauthenticated access. -OPENSHELL_BIND_HOST=0.0.0.0 - -# ---- TLS (mTLS enabled by default) ---- -# A self-signed PKI is auto-generated on first start by init-pki.sh. -# Client certs are placed in ~/.config/openshell/gateways/openshell/mtls/ -# so the CLI discovers them automatically. -# -# To use externally-managed certs, replace the paths below. -# To rotate certs, delete the TLS state directory and restart. -# To disable TLS (NOT RECOMMENDED), uncomment the line below and -# remove or comment out the OPENSHELL_TLS_* and OPENSHELL_PODMAN_TLS_* -# variables. -# OPENSHELL_DISABLE_TLS=true - -# Server TLS (gateway listens with these certs). -OPENSHELL_TLS_CERT=/var/lib/openshell/tls/server/tls.crt -OPENSHELL_TLS_KEY=/var/lib/openshell/tls/server/tls.key -OPENSHELL_TLS_CLIENT_CA=/var/lib/openshell/tls/ca.crt - -# Podman driver: client certs bind-mounted into sandbox containers. -OPENSHELL_PODMAN_TLS_CA=/var/lib/openshell/tls/ca.crt -OPENSHELL_PODMAN_TLS_CERT=/var/lib/openshell/tls/client/tls.crt -OPENSHELL_PODMAN_TLS_KEY=/var/lib/openshell/tls/client/tls.key -EOF - -# --- Gateway state directory --- -install -d %{buildroot}%{_sharedstatedir}/%{name} - -# --- PKI bootstrap script --- +# --- PKI bootstrap script and gateway env generator --- install -d %{buildroot}%{_libexecdir}/%{name} install -pm 0755 deploy/rpm/init-pki.sh %{buildroot}%{_libexecdir}/%{name}/init-pki.sh +install -pm 0755 deploy/rpm/init-gateway-env.sh %{buildroot}%{_libexecdir}/%{name}/init-gateway-env.sh # --- Gateway documentation --- install -d %{buildroot}%{_docdir}/%{name}-gateway -install -pm 0644 deploy/rpm/GATEWAY-CONFIG.md %{buildroot}%{_docdir}/%{name}-gateway/GATEWAY-CONFIG.md +install -pm 0644 deploy/rpm/QUICKSTART.md %{buildroot}%{_docdir}/%{name}-gateway/QUICKSTART.md +install -pm 0644 deploy/rpm/CONFIGURATION.md %{buildroot}%{_docdir}/%{name}-gateway/CONFIGURATION.md +install -pm 0644 deploy/rpm/TROUBLESHOOTING.md %{buildroot}%{_docdir}/%{name}-gateway/TROUBLESHOOTING.md + +# --- Man pages --- +install -Dpm 0644 openshell.1 %{buildroot}%{_mandir}/man1/openshell.1 +install -Dpm 0644 openshell-gateway.8 %{buildroot}%{_mandir}/man8/openshell-gateway.8 +install -Dpm 0644 openshell-gateway.env.5 %{buildroot}%{_mandir}/man5/openshell-gateway.env.5 # --- Python SDK --- # Install Python SDK modules (test files are intentionally excluded) @@ -315,38 +237,31 @@ touch %{buildroot}%{python3_sitelib}/%{name}-%{version}.dist-info/RECORD PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata import version; v = version('openshell'); print(v); assert v == '%{version}', f'expected %{version}, got {v}'" %post gateway -# Generate SSH handshake secret on fresh install if not already set. -# Uses /dev/urandom to avoid requiring openssl at install time. -SYSCONFIG=%{_sysconfdir}/sysconfig/%{name}-gateway -if [ -f "$SYSCONFIG" ] && grep -q '^OPENSHELL_SSH_HANDSHAKE_SECRET=$' "$SYSCONFIG" 2>/dev/null; then - SECRET=$(head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n') - sed -i "s/^OPENSHELL_SSH_HANDSHAKE_SECRET=$/OPENSHELL_SSH_HANDSHAKE_SECRET=${SECRET}/" "$SYSCONFIG" -fi -%systemd_post %{name}-gateway.service %systemd_user_post %{name}-gateway.service %preun gateway -%systemd_preun %{name}-gateway.service %systemd_user_preun %{name}-gateway.service %postun gateway -%systemd_postun_with_restart %{name}-gateway.service %systemd_user_postun_with_restart %{name}-gateway.service %files %license LICENSE %doc README.md %{_bindir}/%{name} +%{_mandir}/man1/openshell.1* %files gateway %license LICENSE -%doc %{_docdir}/%{name}-gateway/GATEWAY-CONFIG.md +%doc %{_docdir}/%{name}-gateway/QUICKSTART.md +%doc %{_docdir}/%{name}-gateway/CONFIGURATION.md +%doc %{_docdir}/%{name}-gateway/TROUBLESHOOTING.md %{_bindir}/%{name}-gateway -%{_unitdir}/%{name}-gateway.service %{_userunitdir}/%{name}-gateway.service %{_libexecdir}/%{name}/init-pki.sh -%attr(0640,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/%{name}-gateway -%dir %{_sharedstatedir}/%{name} +%{_libexecdir}/%{name}/init-gateway-env.sh +%{_mandir}/man8/openshell-gateway.8* +%{_mandir}/man5/openshell-gateway.env.5* %files -n python3-%{name} %license LICENSE From e7c4151df9a2030e727e63d4b0223a9de8e89f4f Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 1 May 2026 14:42:56 -0500 Subject: [PATCH 16/18] fix: resolve auto-merge compilation errors from rebase Fix GatewayMetadata struct literals that were auto-merged without the OIDC fields added in main. Use ..Default::default() consistently. Also fix clippy map_unwrap_or lint in mTLS cert detection and remove extra blank line in cli.rs from conflict resolution. --- crates/openshell-bootstrap/src/metadata.rs | 15 ++----------- crates/openshell-cli/src/run.rs | 25 ++++------------------ crates/openshell-server/src/cli.rs | 1 - 3 files changed, 6 insertions(+), 35 deletions(-) diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index 02e01b17a..2bf1352d4 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -534,14 +534,9 @@ mod tests { let meta = GatewayMetadata { name: "test".to_string(), gateway_endpoint: "https://127.0.0.1:8080".to_string(), - is_remote: false, gateway_port: 8080, - remote_host: None, - resolved_host: None, - auth_mode: None, - edge_team_domain: None, - edge_auth_url: None, client_lifecycle_managed: Some(false), + ..Default::default() }; let json = serde_json::to_string(&meta).unwrap(); assert!(json.contains(r#""client_lifecycle_managed":false"#)); @@ -554,14 +549,8 @@ mod tests { let meta = GatewayMetadata { name: "test".to_string(), gateway_endpoint: "https://127.0.0.1:8080".to_string(), - is_remote: false, gateway_port: 8080, - remote_host: None, - resolved_host: None, - auth_mode: None, - edge_team_domain: None, - edge_auth_url: None, - client_lifecycle_managed: None, + ..Default::default() }; let json = serde_json::to_string(&meta).unwrap(); assert!(!json.contains("client_lifecycle_managed")); diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 30cc36e86..ab9a9db65 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -878,7 +878,7 @@ fn mtls_certs_exist_for_endpoint(name: &str, endpoint: &str) -> bool { name }; openshell_core::paths::xdg_config_dir() - .map(|d| { + .is_ok_and(|d| { let mtls = d .join("openshell") .join("gateways") @@ -888,7 +888,6 @@ fn mtls_certs_exist_for_endpoint(name: &str, endpoint: &str) -> bool { && mtls.join("tls.crt").is_file() && mtls.join("tls.key").is_file() }) - .unwrap_or(false) } fn plaintext_gateway_is_remote(endpoint: &str, remote: Option<&str>, local: bool) -> bool { @@ -6208,14 +6207,9 @@ mod tests { let metadata = GatewayMetadata { name: "localhost".to_string(), gateway_endpoint: "http://localhost:8080".to_string(), - is_remote: false, - gateway_port: 0, - remote_host: None, - resolved_host: None, auth_mode: Some("plaintext".to_string()), - edge_team_domain: None, - edge_auth_url: None, client_lifecycle_managed: Some(false), + ..Default::default() }; let target = resolve_gateway_control_target_from(Some(metadata), None); assert!(matches!(target, GatewayControlTarget::ExternalRegistration)); @@ -6227,14 +6221,9 @@ mod tests { let metadata = GatewayMetadata { name: "openshell".to_string(), gateway_endpoint: "https://127.0.0.1:8080".to_string(), - is_remote: false, gateway_port: 8080, - remote_host: None, - resolved_host: None, - auth_mode: None, - edge_team_domain: None, - edge_auth_url: None, client_lifecycle_managed: Some(true), + ..Default::default() }; let target = resolve_gateway_control_target_from(Some(metadata), None); assert!(matches!(target, GatewayControlTarget::Local)); @@ -6247,14 +6236,8 @@ mod tests { let metadata = GatewayMetadata { name: "openshell".to_string(), gateway_endpoint: "https://127.0.0.1:8080".to_string(), - is_remote: false, gateway_port: 8080, - remote_host: None, - resolved_host: None, - auth_mode: None, - edge_team_domain: None, - edge_auth_url: None, - client_lifecycle_managed: None, + ..Default::default() }; let target = resolve_gateway_control_target_from(Some(metadata), None); assert!(matches!(target, GatewayControlTarget::Local)); diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index d61df1116..040cb48ad 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -22,7 +22,6 @@ use crate::{run_server, tracing_bus::TracingLogBus}; #[command(version = openshell_core::VERSION)] #[command(about = "OpenShell gRPC/HTTP server", long_about = None)] struct Args { - /// Port to bind the server to. #[arg(long, default_value_t = DEFAULT_SERVER_PORT, env = "OPENSHELL_SERVER_PORT")] port: u16, From 051c31ccee122733a0d5f23056e80514fcd5fc62 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 1 May 2026 16:10:20 -0500 Subject: [PATCH 17/18] feat(rpm): add cargo-rpm-macros for bundled crate provides Integrate cargo-rpm-macros >= 25 to automatically generate Provides: bundled(crate(...)) metadata required for Fedora package review. Replaces manual .cargo/config.toml with %cargo_prep -v vendor and adds %cargo_vendor_manifest and %cargo_license macros to produce cargo-vendor.txt and LICENSE.dependencies at build time. --- openshell.spec | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/openshell.spec b/openshell.spec index 0cbef914a..ab93c5fa3 100644 --- a/openshell.spec +++ b/openshell.spec @@ -9,7 +9,7 @@ Name: openshell Version: 0.0.37 -Release: 1.20260501111549922934.rpm.51.g7c400fa8%{?dist} +Release: 1.20260501160805739969.rpm.73.ge7c4151d%{?dist} Summary: Safe, sandboxed runtimes for autonomous AI agents License: Apache-2.0 @@ -26,6 +26,7 @@ ExclusiveArch: x86_64 aarch64 # Rust toolchain via additional_repos in the COPR build config. BuildRequires: rust >= 1.88 BuildRequires: cargo +BuildRequires: cargo-rpm-macros >= 25 BuildRequires: gcc BuildRequires: gcc-c++ BuildRequires: make @@ -82,18 +83,9 @@ management, agent execution, and inference routing via gRPC. %prep %autosetup -n %{name}-%{version} -# Extract vendored Cargo dependencies +# Extract vendored Cargo dependencies and configure offline build tar xf %{SOURCE1} - -# Configure Cargo to use vendored dependencies for offline build -mkdir -p .cargo -cat > .cargo/config.toml << 'EOF' -[source.crates-io] -replace-with = "vendored-sources" - -[source.vendored-sources] -directory = "vendor" -EOF +%cargo_prep -v vendor # Patch workspace version from placeholder to actual version sed -i 's/^version = "0.0.0"/version = "%{version}"/' Cargo.toml @@ -107,6 +99,13 @@ export CARGO_BUILD_JOBS=%{_smp_build_ncpus} export OPENSHELL_IMAGE_TAG=latest cargo build --release --bin openshell --bin openshell-gateway +# Generate vendored crate manifest and license metadata. +# cargo-vendor.txt is consumed by an RPM generator (from cargo-rpm-macros) +# to emit Provides: bundled(crate(...)) = version for every vendored dep. +%cargo_vendor_manifest +%{cargo_license_summary} +%{cargo_license} > LICENSE.dependencies + # Build man pages from markdown pandoc -s -t man deploy/man/openshell.1.md -o openshell.1 pandoc -s -t man deploy/man/openshell-gateway.8.md -o openshell-gateway.8 @@ -247,12 +246,16 @@ PYTHONPATH=%{buildroot}%{python3_sitelib} %{python3} -c "from importlib.metadata %files %license LICENSE +%license LICENSE.dependencies +%license cargo-vendor.txt %doc README.md %{_bindir}/%{name} %{_mandir}/man1/openshell.1* %files gateway %license LICENSE +%license LICENSE.dependencies +%license cargo-vendor.txt %doc %{_docdir}/%{name}-gateway/QUICKSTART.md %doc %{_docdir}/%{name}-gateway/CONFIGURATION.md %doc %{_docdir}/%{name}-gateway/TROUBLESHOOTING.md From dccc44b8e82c4d8209b3c37c12e9ff0bb63444b1 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 1 May 2026 17:01:21 -0500 Subject: [PATCH 18/18] feat(ci): add RPM package build and publish via Packit CLI Build RPMs from source using packit build locally in a Fedora container, mirroring the Debian package workflow pattern. RPMs are built independently (no dependency on pre-built binary jobs), uploaded as GHA artifacts, and included in GitHub Releases alongside debs. The existing Packit/COPR build path (.packit.yaml) is unchanged and continues to serve Fedora repository consumers independently. --- .github/workflows/release-dev.yml | 31 +++++++++++--- .github/workflows/release-tag.yml | 19 ++++++++- .github/workflows/rpm-package.yml | 71 +++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/rpm-package.yml diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 5563a67eb..3ccce68db 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -636,12 +636,20 @@ jobs: checkout-ref: ${{ github.sha }} secrets: inherit + build-rpm: + name: Build RPM Packages + needs: [compute-versions] + uses: ./.github/workflows/rpm-package.yml + with: + checkout-ref: ${{ github.sha }} + secrets: inherit + # --------------------------------------------------------------------------- # Create / update the dev GitHub Release with CLI binaries and wheels # --------------------------------------------------------------------------- release-dev: name: Release Dev - needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, build-deb] + needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, build-deb, build-rpm] runs-on: build-amd64 timeout-minutes: 10 outputs: @@ -684,6 +692,13 @@ jobs: path: release/ merge-multiple: true + - name: Download RPM package artifacts + uses: actions/download-artifact@v4 + with: + pattern: rpm-linux-* + path: release/ + merge-multiple: true + - name: Capture wheel filenames id: wheel_filenames run: | @@ -701,6 +716,7 @@ jobs: openshell-aarch64-unknown-linux-musl.tar.gz \ openshell-aarch64-apple-darwin.tar.gz \ openshell_*.deb \ + openshell-*.rpm \ *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt sha256sum \ @@ -713,7 +729,7 @@ jobs: openshell-sandbox-aarch64-unknown-linux-gnu.tar.gz > openshell-sandbox-checksums-sha256.txt cat openshell-sandbox-checksums-sha256.txt - - name: Prune stale wheel and deb assets from dev release + - name: Prune stale wheel, deb, and rpm assets from dev release uses: actions/github-script@v7 env: WHEEL_VERSION: ${{ needs.compute-versions.outputs.python_version }} @@ -745,13 +761,17 @@ jobs: core.info(` ${String(a.id).padStart(12)} ${a.name}`); } - // Delete stale wheels - let kept = 0, deleted = 0, debDeleted = 0; + // Delete stale wheels, debs, and rpms + let kept = 0, deleted = 0, debDeleted = 0, rpmDeleted = 0; for (const asset of assets) { if (asset.name.endsWith('.deb')) { core.info(`Deleting stale deb package: ${asset.name} (id=${asset.id})`); await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); debDeleted++; + } else if (asset.name.endsWith('.rpm')) { + core.info(`Deleting stale rpm package: ${asset.name} (id=${asset.id})`); + await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id }); + rpmDeleted++; } else if (asset.name.endsWith('.whl') && asset.name.startsWith(currentPrefix)) { core.info(`Keeping current wheel: ${asset.name}`); kept++; @@ -761,7 +781,7 @@ jobs: deleted++; } } - core.info(`Summary: kept_wheels=${kept}, deleted_wheels=${deleted}, deleted_debs=${debDeleted}`); + core.info(`Summary: kept_wheels=${kept}, deleted_wheels=${deleted}, deleted_debs=${debDeleted}, deleted_rpms=${rpmDeleted}`); - name: Move dev tag run: | @@ -793,6 +813,7 @@ jobs: release/openshell-aarch64-unknown-linux-musl.tar.gz release/openshell-aarch64-apple-darwin.tar.gz release/openshell_*.deb + release/openshell-*.rpm release/openshell-gateway-x86_64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-apple-darwin.tar.gz diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index 7df792cba..79d7cbf61 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -663,12 +663,20 @@ jobs: checkout-ref: ${{ inputs.tag || github.ref }} secrets: inherit + build-rpm: + name: Build RPM Packages + needs: [compute-versions] + uses: ./.github/workflows/rpm-package.yml + with: + checkout-ref: ${{ inputs.tag || github.ref }} + secrets: inherit + # --------------------------------------------------------------------------- # Create a tagged GitHub Release with CLI binaries and wheels # --------------------------------------------------------------------------- release: name: Release - needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, tag-ghcr-release, build-deb] + needs: [compute-versions, build-cli-linux, build-cli-macos, build-gateway-binary-linux, build-gateway-binary-macos, build-supervisor-binary-linux, build-python-wheels-linux, build-python-wheel-macos, tag-ghcr-release, build-deb, build-rpm] runs-on: build-amd64 timeout-minutes: 10 outputs: @@ -713,6 +721,13 @@ jobs: path: release/ merge-multiple: true + - name: Download RPM package artifacts + uses: actions/download-artifact@v4 + with: + pattern: rpm-linux-* + path: release/ + merge-multiple: true + - name: Capture wheel filenames id: wheel_filenames run: | @@ -730,6 +745,7 @@ jobs: openshell-aarch64-unknown-linux-musl.tar.gz \ openshell-aarch64-apple-darwin.tar.gz \ openshell_*.deb \ + openshell-*.rpm \ *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt sha256sum \ @@ -763,6 +779,7 @@ jobs: release/openshell-aarch64-unknown-linux-musl.tar.gz release/openshell-aarch64-apple-darwin.tar.gz release/openshell_*.deb + release/openshell-*.rpm release/openshell-gateway-x86_64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-unknown-linux-gnu.tar.gz release/openshell-gateway-aarch64-apple-darwin.tar.gz diff --git a/.github/workflows/rpm-package.yml b/.github/workflows/rpm-package.yml new file mode 100644 index 000000000..b80882d75 --- /dev/null +++ b/.github/workflows/rpm-package.yml @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: RPM Package + +on: + workflow_call: + inputs: + checkout-ref: + required: true + type: string + +permissions: + contents: read + +defaults: + run: + shell: bash + +jobs: + build-rpm-linux: + name: Build RPM Package (Linux ${{ matrix.arch }}) + strategy: + matrix: + include: + - arch: x86_64 + runner: build-amd64 + - arch: aarch64 + runner: build-arm64 + runs-on: ${{ matrix.runner }} + timeout-minutes: 60 + container: + image: fedora:latest + steps: + - name: Install build dependencies + run: | + dnf install -y \ + packit rpm-build \ + rust cargo gcc gcc-c++ make cmake pkg-config \ + clang-devel z3-devel systemd-rpm-macros \ + pandoc python3-devel git-core \ + cargo-rpm-macros + + - uses: actions/checkout@v6 + with: + ref: ${{ inputs.checkout-ref }} + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Build RPMs via Packit + run: packit build locally + + - name: Collect RPM artifacts + run: | + set -euo pipefail + mkdir -p artifacts + find ~/rpmbuild/RPMS/ -name '*.rpm' -exec cp {} artifacts/ \; + echo "=== Built RPMs ===" + ls -lah artifacts/ + + - name: Upload RPM artifacts + uses: actions/upload-artifact@v4 + with: + name: rpm-linux-${{ matrix.arch }} + path: artifacts/*.rpm + retention-days: 5