Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions flows/flow-14-live-obol-base-sepolia.sh
Original file line number Diff line number Diff line change
Expand Up @@ -745,12 +745,40 @@ if [ "$import_rc" -ne 0 ]; then
fi
pass "Alice remote-signer seeded with seller wallet"

# Guard: confirm the remote-signer pod was actually rolled by the wallet
# import. Helm does NOT re-roll a Deployment when only a Secret's data
# changed, so a regression that drops the explicit kubectl rollout-restart
# would leave the pod running with the chart's bootstrap keystore-password
# Secret in env. The pod would then sign with the throwaway address and
# `obol sell register` would fail 5 minutes later with "gas required
# exceeds allowance (0)" — a confusing, slow failure. This step fails
# fast with a clear diagnostic instead.
step "Alice: remote-signer pod rolled by wallet import (age < 120s)"
set +e
pod_start=$(alice kubectl get pods -n hermes-obol-agent \
-l app.kubernetes.io/name=remote-signer \
-o jsonpath='{.items[0].status.startTime}' 2>/dev/null)
set -e
if [ -z "$pod_start" ]; then
fail "remote-signer pod not found (label app.kubernetes.io/name=remote-signer)"
emit_metrics; exit 1
fi
pod_epoch=$(date -u -d "$pod_start" +%s 2>/dev/null || python3 -c "import datetime,sys; print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$pod_start")
now_epoch=$(date -u +%s)
pod_age=$((now_epoch - pod_epoch))
if [ "$pod_age" -gt 120 ]; then
fail "remote-signer pod is ${pod_age}s old — wallet import did not roll the deployment (likely stale keystore-password Secret). Run 'obol kubectl -n hermes-obol-agent rollout restart deployment/remote-signer' and retry."
emit_metrics; exit 1
fi
pass "remote-signer pod is ${pod_age}s old (rolled by wallet import)"

step "Alice: drive ERC-8004 registration (obol sell register)"
# 5-minute hard timeout: the on-chain tx + WaitForAgent + SetMetadata
# should complete in ~30-60s; anything beyond that is a hang we want
# to surface, not silently block the run. `timeout` is an external
# program and cannot see the `alice()` bash function, so call the
# binary directly with the same env the function exports.
set +e
register_out=$(timeout 300 \
env OBOL_DEVELOPMENT=true OBOL_NONINTERACTIVE=true \
OBOL_CONFIG_DIR="$ALICE_DIR/config" \
Expand All @@ -761,6 +789,7 @@ register_out=$(timeout 300 \
--endpoint "$TUNNEL_URL" \
--name "Live OBOL Base Sepolia Test Inference" 2>&1)
register_rc=$?
set -e
printf '%s\n' "$register_out" | tail -10
if [ "$register_rc" -ne 0 ]; then
fail "obol sell register failed (exit $register_rc) — offer will stay AwaitingExternalRegistration"
Expand Down
16 changes: 16 additions & 0 deletions internal/agentruntime/charts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package agentruntime

// RemoteSignerChartVersion is the single source of truth for the
// `remote-signer` Helm chart pin used by both Hermes and OpenClaw
// deployments. It MUST be updated as a single edit; bumping it here
// updates every consumer in lockstep.
//
// Chart 0.3.1 ships remote-signer image `v0.2.0`, which accepts the
// canonical-string signer contract (chain_id, value, etc. serialized
// as JSON strings) introduced by PR #359. Chart 0.3.0 ships `v0.1.0`,
// which only accepts the legacy u64 contract and breaks `obol sell
// register` for current obol-stack with HTTP 422 "chain_id: invalid
// type: string \"84532\", expected u64".
//
// renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/
const RemoteSignerChartVersion = "0.3.1"
4 changes: 1 addition & 3 deletions internal/hermes/hermes.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ const (
gatewayTokenFileName = ".gateway-token"
obolSkillsDirName = "obol-skills"

// renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/
remoteSignerChartVersion = "0.3.0"
// renovate: datasource=helm depName=raw registryUrl=https://bedag.github.io/helm-charts/
rawChartVersion = "2.0.2"

Expand Down Expand Up @@ -625,7 +623,7 @@ releases:
version: %s
values:
- values-remote-signer.yaml
`, namespace, rawChartVersion, valuesFileName, namespace, remoteSignerChartVersion)
`, namespace, rawChartVersion, valuesFileName, namespace, agentruntime.RemoteSignerChartVersion)
}

func dashboardHostname(id string) string {
Expand Down
59 changes: 59 additions & 0 deletions internal/hermes/wallet_import.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/ObolNetwork/obol-stack/internal/agentruntime"
"github.com/ObolNetwork/obol-stack/internal/config"
"github.com/ObolNetwork/obol-stack/internal/kubectl"
"github.com/ObolNetwork/obol-stack/internal/ui"
ethcrypto "github.com/ethereum/go-ethereum/crypto"
)
Expand All @@ -23,6 +24,16 @@ type ImportPrivateKeyWalletOptions struct {
ApplyCluster bool
}

// Indirection seams so tests can spy on / replace the cluster-side calls
// without standing up a real k3d cluster. Production wires them to the real
// helmfile-sync + kubectl rollout helpers.
var (
syncFn = Sync
restartHermesRemoteSignerFn = restartHermesRemoteSigner
ensureVolumeWritableFn = ensureVolumeWritable
fixRuntimeVolumeOwnershipFn = fixRuntimeVolumeOwnership
)

// ImportPrivateKeyWalletCmd imports an existing private key as the
// remote-signer wallet for a Hermes instance. Mirror of the OpenClaw path.
func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivateKeyWalletOptions, u *ui.UI) error {
Expand Down Expand Up @@ -68,9 +79,49 @@ func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivate
u.Detail("Address", wallet.Address)
u.Detail("Instance", id)

// When the cluster is live, helmfile-sync so the new keystore password
// Secret reaches the pod, then explicitly roll the deployment — helm
// does not roll on Secret-data-only changes, so the pod would keep
// decrypting with the old chart-bootstrap password and remote-signer
// calls would sign with the throwaway address.
if opts.ApplyCluster {
u.Blank()
u.Info("Applying changes to cluster (helmfile sync)...")
if err := syncFn(cfg, id, u); err != nil {
u.Warnf("helmfile sync failed: %v", err)
u.Printf("Run 'obol hermes sync %s' manually before issuing remote-signer calls.", id)
} else {
restartHermesRemoteSignerFn(cfg, id, u)
}
}

return nil
}

// restartHermesRemoteSigner kicks a rollout-restart on the remote-signer
// deployment so the pod re-reads the freshly-applied keystore-password Secret.
// Best-effort: a still-coming-up cluster will surface the error to the caller
// as a warning, not a hard failure, since wallet metadata + values files have
// already been written and a later `obol hermes sync` can finish the job.
func restartHermesRemoteSigner(cfg *config.Config, id string, u *ui.UI) {
namespace := agentruntime.Namespace(agentruntime.Hermes, id)
kubectlBin, kubeconfig := kubectl.Paths(cfg)
if err := kubectl.RunSilent(kubectlBin, kubeconfig,
"rollout", "restart", "deployment/remote-signer", "-n", namespace,
); err != nil {
u.Warnf("Could not restart remote-signer (cluster may not be running): %v", err)
u.Printf("Run 'obol kubectl -n %s rollout restart deployment/remote-signer' to apply the new keystore.", namespace)
return
}
if err := kubectl.RunSilent(kubectlBin, kubeconfig,
"rollout", "status", "deployment/remote-signer", "-n", namespace, "--timeout=120s",
); err != nil {
u.Warnf("remote-signer rollout did not complete in 120s: %v", err)
return
}
u.Success("Remote-signer restarted")
}

// ImportWalletFromPrivateKey provisions an existing Ethereum private key as
// the remote-signer wallet for a Hermes instance.
func ImportWalletFromPrivateKey(cfg *config.Config, id, privateKeyHex string, u *ui.UI) (*WalletInfo, error) {
Expand Down Expand Up @@ -130,6 +181,14 @@ func archiveReplacedHermesKeystore(cfg *config.Config, id string, existingWallet
}

dir := agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, id)

// The keystores volume is normally container-owned (uid 10000, mode 700)
// after provisionKeystoreToVolume's fixRuntimeVolumeOwnership. Bookend the
// stat/mkdir/rename with the same ownership flip provision uses, otherwise
// the host process can't even traverse the directory.
ensureVolumeWritableFn(cfg, dir, u)
defer fixRuntimeVolumeOwnershipFn(cfg, dir, u)

oldPath := filepath.Join(dir, existingWallet.KeystoreUUID+".json")
if _, err := os.Stat(oldPath); err != nil {
if errors.Is(err, os.ErrNotExist) {
Expand Down
Loading