From 6c5106a51c74f98c645179c5fb5b23ac4dc42571 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 13:11:55 +0800 Subject: [PATCH 1/6] fix(hermes): bookend wallet-import archival with k3d ownership flip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit archiveReplacedHermesKeystore stat/mkdir/rename the host-path PVC directly, but provisionKeystoreToVolume's last step (fixRuntimeVolumeOwnership) leaves the keystores dir as mode 700 owned by the container's uid 10000. The host-side process (uid 1000) then cannot traverse the dir, so os.Stat returns EACCES and the wrapping caller surfaces "failed to archive replaced keystore: stat …: permission denied". Mirror the pattern provisionKeystoreToVolume already uses: call ensureVolumeWritable up front (chowns to host uid via k3d node-exec), defer fixRuntimeVolumeOwnership so all return paths restore container ownership for the remote-signer pod. The bug pre-dates the obol-wallet-import flow rewrite; flow-14 only started exercising the path on Alice once the --private-key-file escape hatch was removed. --- internal/hermes/wallet_import.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/hermes/wallet_import.go b/internal/hermes/wallet_import.go index 5a770571..21a5d137 100644 --- a/internal/hermes/wallet_import.go +++ b/internal/hermes/wallet_import.go @@ -130,6 +130,14 @@ func archiveReplacedHermesKeystore(cfg *config.Config, id string, existingWallet } dir := agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, id) + + // The keystores volume is normally container-owned (uid 10000, mode 700) + // after provisionKeystoreToVolume's fixRuntimeVolumeOwnership. Bookend the + // stat/mkdir/rename with the same ownership flip provision uses, otherwise + // the host process can't even traverse the directory. + ensureVolumeWritable(cfg, dir, u) + defer fixRuntimeVolumeOwnership(cfg, dir, u) + oldPath := filepath.Join(dir, existingWallet.KeystoreUUID+".json") if _, err := os.Stat(oldPath); err != nil { if errors.Is(err, os.ErrNotExist) { From a214050d9a8d88040156c6a0beb505411d310f7e Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 14:15:37 +0800 Subject: [PATCH 2/6] =?UTF-8?q?fix(hermes):=20honor=20ApplyCluster=20?= =?UTF-8?q?=E2=80=94=20helmfile-sync=20after=20wallet=20import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ImportPrivateKeyWalletOptions.ApplyCluster has been plumbed all the way from cmd/obol/wallet.go since the OpenClaw → Hermes routing fix, but ImportPrivateKeyWalletCmd never actually consumed it. Effect: `obol wallet import` against a live cluster wrote the new keystore to the host-path PVC and updated values-remote-signer.yaml on disk, but the running remote-signer pod kept decrypting with the old chart-bootstrap keystore-password Secret and signed with the chart's throwaway address (e.g. 0xb6aF…). On a flow-14 register tx that surfaced as "gas required exceeds allowance (0)" — chart key has no funds. Mirror OpenClaw's finalizeWalletProvision pattern: when the cluster is reachable, run hermes.Sync to helmfile-sync the deployment. helmfile reapplies the keystore-password Secret with the new value and helm rolls the remote-signer deployment, so the pod restarts against the freshly-imported keystore. Failure to sync is best-effort — emits a warning and a recovery hint instead of failing the import outright (cluster might come up later). --- internal/hermes/wallet_import.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/internal/hermes/wallet_import.go b/internal/hermes/wallet_import.go index 21a5d137..f7a277ee 100644 --- a/internal/hermes/wallet_import.go +++ b/internal/hermes/wallet_import.go @@ -68,6 +68,19 @@ func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivate u.Detail("Address", wallet.Address) u.Detail("Instance", id) + // When the cluster is live, helmfile-sync so the new keystore password + // Secret reaches the pod and helm rolls the remote-signer deployment. + // Without this, the pod keeps decrypting with the old chart-bootstrap + // password and remote-signer calls sign with the throwaway address. + if opts.ApplyCluster { + u.Blank() + u.Info("Applying changes to cluster (helmfile sync)...") + if err := Sync(cfg, id, u); err != nil { + u.Warnf("helmfile sync failed: %v", err) + u.Printf("Run 'obol hermes sync %s' manually before issuing remote-signer calls.", id) + } + } + return nil } From b17995a769ad067e113e722c50a99d5122cb80bf Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 16:07:12 +0800 Subject: [PATCH 3/6] fix(hermes,flow-14): roll remote-signer after import + protect register from set -e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups to the helmfile-sync addition (a214050): 1. helm doesn't roll a Deployment when only a Secret's data changed — the Deployment template still references the same Secret name, so helm patches the Secret in-place and leaves the pod running with the stale env. After Sync, run an explicit `kubectl rollout restart deployment/remote-signer` and wait up to 120s for the new pod to be ready. Mirrors OpenClaw's restartRemoteSigner semantics. 2. flow-14 step 23 ran `register_out=$(timeout 300 obol sell register …)` under set -e from lib.sh. obol sell register correctly exits 1 on chain failure, but the assignment-with-command-substitution under errexit kills the script before the if-check can fire fail() and emit_metrics — the run looked like a silent death at "STEP: [23]" instead of a clean FAIL with metrics. Wrap in set +e/-e the same way step 22 (wallet import) already does. Together with 6c5106a (archive bookend) and a214050 (Sync on ApplyCluster), `obol wallet import` against a live Hermes cluster now fully replaces the chart bootstrap key end-to-end without flow-level workarounds. --- flows/flow-14-live-obol-base-sepolia.sh | 2 ++ internal/hermes/wallet_import.go | 34 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/flows/flow-14-live-obol-base-sepolia.sh b/flows/flow-14-live-obol-base-sepolia.sh index c7f2898a..17dff4d2 100755 --- a/flows/flow-14-live-obol-base-sepolia.sh +++ b/flows/flow-14-live-obol-base-sepolia.sh @@ -751,6 +751,7 @@ step "Alice: drive ERC-8004 registration (obol sell register)" # to surface, not silently block the run. `timeout` is an external # program and cannot see the `alice()` bash function, so call the # binary directly with the same env the function exports. +set +e register_out=$(timeout 300 \ env OBOL_DEVELOPMENT=true OBOL_NONINTERACTIVE=true \ OBOL_CONFIG_DIR="$ALICE_DIR/config" \ @@ -761,6 +762,7 @@ register_out=$(timeout 300 \ --endpoint "$TUNNEL_URL" \ --name "Live OBOL Base Sepolia Test Inference" 2>&1) register_rc=$? +set -e printf '%s\n' "$register_out" | tail -10 if [ "$register_rc" -ne 0 ]; then fail "obol sell register failed (exit $register_rc) — offer will stay AwaitingExternalRegistration" diff --git a/internal/hermes/wallet_import.go b/internal/hermes/wallet_import.go index f7a277ee..91cdb21d 100644 --- a/internal/hermes/wallet_import.go +++ b/internal/hermes/wallet_import.go @@ -11,6 +11,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/agentruntime" "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/kubectl" "github.com/ObolNetwork/obol-stack/internal/ui" ethcrypto "github.com/ethereum/go-ethereum/crypto" ) @@ -69,21 +70,48 @@ func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivate u.Detail("Instance", id) // When the cluster is live, helmfile-sync so the new keystore password - // Secret reaches the pod and helm rolls the remote-signer deployment. - // Without this, the pod keeps decrypting with the old chart-bootstrap - // password and remote-signer calls sign with the throwaway address. + // Secret reaches the pod, then explicitly roll the deployment — helm + // does not roll on Secret-data-only changes, so the pod would keep + // decrypting with the old chart-bootstrap password and remote-signer + // calls would sign with the throwaway address. if opts.ApplyCluster { u.Blank() u.Info("Applying changes to cluster (helmfile sync)...") if err := Sync(cfg, id, u); err != nil { u.Warnf("helmfile sync failed: %v", err) u.Printf("Run 'obol hermes sync %s' manually before issuing remote-signer calls.", id) + } else { + restartHermesRemoteSigner(cfg, id, u) } } return nil } +// restartHermesRemoteSigner kicks a rollout-restart on the remote-signer +// deployment so the pod re-reads the freshly-applied keystore-password Secret. +// Best-effort: a still-coming-up cluster will surface the error to the caller +// as a warning, not a hard failure, since wallet metadata + values files have +// already been written and a later `obol hermes sync` can finish the job. +func restartHermesRemoteSigner(cfg *config.Config, id string, u *ui.UI) { + namespace := agentruntime.Namespace(agentruntime.Hermes, id) + kubectlBin, kubeconfig := kubectl.Paths(cfg) + if err := kubectl.RunSilent(kubectlBin, kubeconfig, + "rollout", "restart", "deployment/remote-signer", "-n", namespace, + ); err != nil { + u.Warnf("Could not restart remote-signer (cluster may not be running): %v", err) + u.Printf("Run 'obol kubectl -n %s rollout restart deployment/remote-signer' to apply the new keystore.", namespace) + return + } + if err := kubectl.RunSilent(kubectlBin, kubeconfig, + "rollout", "status", "deployment/remote-signer", "-n", namespace, "--timeout=120s", + ); err != nil { + u.Warnf("remote-signer rollout did not complete in 120s: %v", err) + return + } + u.Success("Remote-signer restarted") +} + // ImportWalletFromPrivateKey provisions an existing Ethereum private key as // the remote-signer wallet for a Hermes instance. func ImportWalletFromPrivateKey(cfg *config.Config, id, privateKeyHex string, u *ui.UI) (*WalletInfo, error) { From 769086de0022bae2d6669dc4b4fe7d462a09ec4d Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 16:22:56 +0800 Subject: [PATCH 4/6] test(hermes): unit tests + flow-14 guard for wallet-import cluster wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests cover the regression classes surfaced in this PR: - TestArchiveReplacedHermesKeystore_NilExisting / SameUUID — happy short-circuit paths must NOT call the k3d node-exec helpers. - TestArchiveReplacedHermesKeystore_BookendOrder — guards 6c5106a: the (ensureVolumeWritable → fixRuntimeVolumeOwnership) bookend MUST run in order, and the deferred fix MUST fire on every return path including the os.Stat ENOENT early-return. - TestArchiveReplacedHermesKeystore_RenamesToReplaced — happy-path archival writes the file under /replaced/-.json and removes the original. - TestImportPrivateKeyWalletCmd_ApplyClusterFalseSkipsCluster — guards the inverse of a214050: the pre-cluster bootstrap path must NOT helmfile-sync or rollout-restart. - TestImportPrivateKeyWalletCmd_ApplyClusterTrueRollsPod — primary guard: ApplyCluster=true must invoke both Sync AND restartHermesRemoteSigner (helm doesn't roll on Secret-data changes, so the rollout-restart is non-optional). - TestImportPrivateKeyWalletCmd_SyncFailureSkipsRestart — best-effort contract: Sync error → skip restart, do NOT fail the import as a whole; on-disk artifacts let a later `obol hermes sync` finish. Tests use indirection seams (var syncFn, restartHermesRemoteSignerFn, ensureVolumeWritableFn, fixRuntimeVolumeOwnershipFn) to spy/replace without standing up a real k3d cluster. Flow-level guard: a new step between 22 (wallet import) and 23 (register) asserts the remote-signer pod's startTime is within 120s of now. If a regression drops the explicit kubectl rollout-restart, the pod stays old → assertion fails fast with a clear "wallet import did not roll the deployment" diagnostic, instead of falling through to the 5-minute "gas required exceeds allowance (0)" symptom. --- flows/flow-14-live-obol-base-sepolia.sh | 27 +++ internal/hermes/wallet_import.go | 18 +- internal/hermes/wallet_import_test.go | 249 ++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 internal/hermes/wallet_import_test.go diff --git a/flows/flow-14-live-obol-base-sepolia.sh b/flows/flow-14-live-obol-base-sepolia.sh index 17dff4d2..6e8ccb4b 100755 --- a/flows/flow-14-live-obol-base-sepolia.sh +++ b/flows/flow-14-live-obol-base-sepolia.sh @@ -745,6 +745,33 @@ if [ "$import_rc" -ne 0 ]; then fi pass "Alice remote-signer seeded with seller wallet" +# Guard: confirm the remote-signer pod was actually rolled by the wallet +# import. Helm does NOT re-roll a Deployment when only a Secret's data +# changed, so a regression that drops the explicit kubectl rollout-restart +# would leave the pod running with the chart's bootstrap keystore-password +# Secret in env. The pod would then sign with the throwaway address and +# `obol sell register` would fail 5 minutes later with "gas required +# exceeds allowance (0)" — a confusing, slow failure. This step fails +# fast with a clear diagnostic instead. +step "Alice: remote-signer pod rolled by wallet import (age < 120s)" +set +e +pod_start=$(alice kubectl get pods -n hermes-obol-agent \ + -l app.kubernetes.io/name=remote-signer \ + -o jsonpath='{.items[0].status.startTime}' 2>/dev/null) +set -e +if [ -z "$pod_start" ]; then + fail "remote-signer pod not found (label app.kubernetes.io/name=remote-signer)" + emit_metrics; exit 1 +fi +pod_epoch=$(date -u -d "$pod_start" +%s 2>/dev/null || python3 -c "import datetime,sys; print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$pod_start") +now_epoch=$(date -u +%s) +pod_age=$((now_epoch - pod_epoch)) +if [ "$pod_age" -gt 120 ]; then + fail "remote-signer pod is ${pod_age}s old — wallet import did not roll the deployment (likely stale keystore-password Secret). Run 'obol kubectl -n hermes-obol-agent rollout restart deployment/remote-signer' and retry." + emit_metrics; exit 1 +fi +pass "remote-signer pod is ${pod_age}s old (rolled by wallet import)" + step "Alice: drive ERC-8004 registration (obol sell register)" # 5-minute hard timeout: the on-chain tx + WaitForAgent + SetMetadata # should complete in ~30-60s; anything beyond that is a hang we want diff --git a/internal/hermes/wallet_import.go b/internal/hermes/wallet_import.go index 91cdb21d..e72cce3c 100644 --- a/internal/hermes/wallet_import.go +++ b/internal/hermes/wallet_import.go @@ -24,6 +24,16 @@ type ImportPrivateKeyWalletOptions struct { ApplyCluster bool } +// Indirection seams so tests can spy on / replace the cluster-side calls +// without standing up a real k3d cluster. Production wires them to the real +// helmfile-sync + kubectl rollout helpers. +var ( + syncFn = Sync + restartHermesRemoteSignerFn = restartHermesRemoteSigner + ensureVolumeWritableFn = ensureVolumeWritable + fixRuntimeVolumeOwnershipFn = fixRuntimeVolumeOwnership +) + // ImportPrivateKeyWalletCmd imports an existing private key as the // remote-signer wallet for a Hermes instance. Mirror of the OpenClaw path. func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivateKeyWalletOptions, u *ui.UI) error { @@ -77,11 +87,11 @@ func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivate if opts.ApplyCluster { u.Blank() u.Info("Applying changes to cluster (helmfile sync)...") - if err := Sync(cfg, id, u); err != nil { + if err := syncFn(cfg, id, u); err != nil { u.Warnf("helmfile sync failed: %v", err) u.Printf("Run 'obol hermes sync %s' manually before issuing remote-signer calls.", id) } else { - restartHermesRemoteSigner(cfg, id, u) + restartHermesRemoteSignerFn(cfg, id, u) } } @@ -176,8 +186,8 @@ func archiveReplacedHermesKeystore(cfg *config.Config, id string, existingWallet // after provisionKeystoreToVolume's fixRuntimeVolumeOwnership. Bookend the // stat/mkdir/rename with the same ownership flip provision uses, otherwise // the host process can't even traverse the directory. - ensureVolumeWritable(cfg, dir, u) - defer fixRuntimeVolumeOwnership(cfg, dir, u) + ensureVolumeWritableFn(cfg, dir, u) + defer fixRuntimeVolumeOwnershipFn(cfg, dir, u) oldPath := filepath.Join(dir, existingWallet.KeystoreUUID+".json") if _, err := os.Stat(oldPath); err != nil { diff --git a/internal/hermes/wallet_import_test.go b/internal/hermes/wallet_import_test.go new file mode 100644 index 00000000..2d190176 --- /dev/null +++ b/internal/hermes/wallet_import_test.go @@ -0,0 +1,249 @@ +package hermes + +import ( + "os" + "path/filepath" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/agentruntime" + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/ui" +) + +// Anvil account #1 — well-known Hardhat test mnemonic. Public on every +// install, never used on mainnet, safe to commit. +const testPrivateKeyHex = "ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" + +func newTestUI() *ui.UI { return ui.New(false) } + +func walletImportTestConfig(t *testing.T, id string) (*config.Config, string) { + t.Helper() + dir := t.TempDir() + cfg := &config.Config{ConfigDir: dir, DataDir: dir, BinDir: dir} + deployDir := DeploymentPath(cfg, id) + if err := os.MkdirAll(deployDir, 0o755); err != nil { + t.Fatalf("mkdir deploy: %v", err) + } + keystoreDir := agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, id) + if err := os.MkdirAll(keystoreDir, 0o700); err != nil { + t.Fatalf("mkdir keystore: %v", err) + } + return cfg, deployDir +} + +func writeKeyFile(t *testing.T) string { + t.Helper() + f := filepath.Join(t.TempDir(), "key.hex") + if err := os.WriteFile(f, []byte(testPrivateKeyHex), 0o600); err != nil { + t.Fatalf("write key: %v", err) + } + return f +} + +// stubVolumeOwnership replaces the k3d-node-exec helpers with no-op spies that +// record call order. Tests use this to assert the bookend ordering inside +// archiveReplacedHermesKeystore without needing a real cluster. +func stubVolumeOwnership(t *testing.T) *[]string { + t.Helper() + calls := []string{} + origEnsure := ensureVolumeWritableFn + origFix := fixRuntimeVolumeOwnershipFn + t.Cleanup(func() { + ensureVolumeWritableFn = origEnsure + fixRuntimeVolumeOwnershipFn = origFix + }) + ensureVolumeWritableFn = func(_ *config.Config, _ string, _ *ui.UI) { + calls = append(calls, "ensureVolumeWritable") + } + fixRuntimeVolumeOwnershipFn = func(_ *config.Config, _ string, _ *ui.UI) { + calls = append(calls, "fixRuntimeVolumeOwnership") + } + return &calls +} + +// stubClusterApply replaces Sync + restartHermesRemoteSigner with spies that +// record invocation. Sync's return value is configurable so tests can drive +// the success and failure branches independently. +func stubClusterApply(t *testing.T, syncErr error) (syncCalled, restartCalled *bool) { + t.Helper() + sc, rc := false, false + origSync := syncFn + origRestart := restartHermesRemoteSignerFn + t.Cleanup(func() { + syncFn = origSync + restartHermesRemoteSignerFn = origRestart + }) + syncFn = func(_ *config.Config, _ string, _ *ui.UI) error { + sc = true + return syncErr + } + restartHermesRemoteSignerFn = func(_ *config.Config, _ string, _ *ui.UI) { + rc = true + } + return &sc, &rc +} + +// TestArchiveReplacedHermesKeystore_NilExisting guards the happy path: no +// previous wallet on disk → archive is a no-op and must not call the +// k3d-node-exec helpers. +func TestArchiveReplacedHermesKeystore_NilExisting(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + calls := stubVolumeOwnership(t) + if err := archiveReplacedHermesKeystore(cfg, "obol-agent", nil, "new-uuid", newTestUI()); err != nil { + t.Fatalf("archive: %v", err) + } + if len(*calls) != 0 { + t.Fatalf("expected zero ownership calls, got %v", *calls) + } +} + +// TestArchiveReplacedHermesKeystore_SameUUID covers the case where the import +// re-uses the existing keystore UUID (a re-import idempotency case). Archive +// must short-circuit before any filesystem mutation. +func TestArchiveReplacedHermesKeystore_SameUUID(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + calls := stubVolumeOwnership(t) + existing := &WalletInfo{KeystoreUUID: "same-uuid"} + if err := archiveReplacedHermesKeystore(cfg, "obol-agent", existing, "same-uuid", newTestUI()); err != nil { + t.Fatalf("archive: %v", err) + } + if len(*calls) != 0 { + t.Fatalf("expected zero ownership calls, got %v", *calls) + } +} + +// TestArchiveReplacedHermesKeystore_BookendOrder is the regression guard for +// the fix in commit 6c5106a: the bookend pair MUST run in the order +// (ensure → … → fix), and the deferred fix MUST run even when the function +// returns early because the old keystore file is missing. +func TestArchiveReplacedHermesKeystore_BookendOrder(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + calls := stubVolumeOwnership(t) + // existingWallet points at a UUID whose file does not exist on disk → + // archive returns early after the os.Stat ENOENT branch. Even on that + // early-return path the deferred fix call must still fire. + existing := &WalletInfo{KeystoreUUID: "missing-uuid"} + if err := archiveReplacedHermesKeystore(cfg, "obol-agent", existing, "new-uuid", newTestUI()); err != nil { + t.Fatalf("archive: %v", err) + } + if len(*calls) != 2 || (*calls)[0] != "ensureVolumeWritable" || (*calls)[1] != "fixRuntimeVolumeOwnership" { + t.Fatalf("expected [ensureVolumeWritable, fixRuntimeVolumeOwnership], got %v", *calls) + } +} + +// TestArchiveReplacedHermesKeystore_RenamesToReplaced exercises the happy +// path: a real keystore file exists in the volume, archive renames it under +// the `replaced/` subdir with a timestamp suffix, and the original path no +// longer exists. +func TestArchiveReplacedHermesKeystore_RenamesToReplaced(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + stubVolumeOwnership(t) + dir := agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, "obol-agent") + oldUUID := "old-uuid-aaaa" + oldPath := filepath.Join(dir, oldUUID+".json") + if err := os.WriteFile(oldPath, []byte(`{"placeholder":"keystore"}`), 0o600); err != nil { + t.Fatalf("write old keystore: %v", err) + } + + existing := &WalletInfo{KeystoreUUID: oldUUID} + if err := archiveReplacedHermesKeystore(cfg, "obol-agent", existing, "new-uuid", newTestUI()); err != nil { + t.Fatalf("archive: %v", err) + } + + if _, err := os.Stat(oldPath); !os.IsNotExist(err) { + t.Fatalf("old keystore should be gone, got err=%v", err) + } + entries, err := os.ReadDir(filepath.Join(dir, "replaced")) + if err != nil { + t.Fatalf("read replaced dir: %v", err) + } + if len(entries) != 1 { + t.Fatalf("expected exactly 1 archived keystore, got %d", len(entries)) + } +} + +// TestImportPrivateKeyWalletCmd_ApplyClusterFalseSkipsCluster guards the +// inverse direction of the ApplyCluster wiring (a214050): when the caller +// did not opt in to cluster-side apply (e.g. pre-cluster-up bootstrap), we +// must NOT call helmfile sync or rollout-restart. +func TestImportPrivateKeyWalletCmd_ApplyClusterFalseSkipsCluster(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + stubVolumeOwnership(t) + syncCalled, restartCalled := stubClusterApply(t, nil) + + keyFile := writeKeyFile(t) + if err := ImportPrivateKeyWalletCmd(cfg, "obol-agent", ImportPrivateKeyWalletOptions{ + PrivateKeyFile: keyFile, + Force: true, + ApplyCluster: false, + }, newTestUI()); err != nil { + t.Fatalf("import: %v", err) + } + if *syncCalled { + t.Fatal("Sync was called even though ApplyCluster=false") + } + if *restartCalled { + t.Fatal("restartHermesRemoteSigner was called even though ApplyCluster=false") + } +} + +// TestImportPrivateKeyWalletCmd_ApplyClusterTrueRollsPod is the regression +// guard for the pair of fixes in PR #397: ApplyCluster=true MUST trigger +// helmfile-sync AND the explicit rollout-restart that follows on success +// (helm doesn't roll on Secret-data-only changes, so without the restart +// the pod keeps signing with the chart's bootstrap address). +func TestImportPrivateKeyWalletCmd_ApplyClusterTrueRollsPod(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + stubVolumeOwnership(t) + syncCalled, restartCalled := stubClusterApply(t, nil) + + keyFile := writeKeyFile(t) + if err := ImportPrivateKeyWalletCmd(cfg, "obol-agent", ImportPrivateKeyWalletOptions{ + PrivateKeyFile: keyFile, + Force: true, + ApplyCluster: true, + }, newTestUI()); err != nil { + t.Fatalf("import: %v", err) + } + if !*syncCalled { + t.Fatal("Sync was NOT called with ApplyCluster=true") + } + if !*restartCalled { + t.Fatal("restartHermesRemoteSigner was NOT called after successful Sync") + } +} + +// TestImportPrivateKeyWalletCmd_SyncFailureSkipsRestart documents the +// best-effort contract: if helmfile-sync errored, do NOT proceed to +// rollout-restart (it would just roll the pod against a stale Secret). +// The wallet import as a whole still succeeds — the on-disk artifacts +// (wallet.json, keystore, values-remote-signer.yaml) are written, so a +// later `obol hermes sync` can finish the job. +func TestImportPrivateKeyWalletCmd_SyncFailureSkipsRestart(t *testing.T) { + cfg, _ := walletImportTestConfig(t, "obol-agent") + stubVolumeOwnership(t) + syncCalled, restartCalled := stubClusterApply(t, errSyncFailed) + + keyFile := writeKeyFile(t) + if err := ImportPrivateKeyWalletCmd(cfg, "obol-agent", ImportPrivateKeyWalletOptions{ + PrivateKeyFile: keyFile, + Force: true, + ApplyCluster: true, + }, newTestUI()); err != nil { + t.Fatalf("import should be best-effort, got hard error: %v", err) + } + if !*syncCalled { + t.Fatal("Sync was NOT called") + } + if *restartCalled { + t.Fatal("restartHermesRemoteSigner ran even though Sync failed") + } +} + +// errSyncFailed is a sentinel error used by tests to drive the +// helmfile-sync-failed branch of ImportPrivateKeyWalletCmd. +var errSyncFailed = &syncErr{msg: "test: sync failed"} + +type syncErr struct{ msg string } + +func (e *syncErr) Error() string { return e.msg } From 395485218bbdb5d778d4ee3ae99be02b2ab99afa Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 16:27:48 +0800 Subject: [PATCH 5/6] =?UTF-8?q?fix(hermes):=20bump=20remote-signer=20chart?= =?UTF-8?q?=200.3.0=20=E2=86=92=200.3.1=20+=20consistency=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chart 0.3.1 was published 2026-04-23 with appVersion `v0.2.0`, which accepts the canonical-string signer contract (chain_id, value, etc. serialized as JSON strings) introduced by PR #359 / commit b9495b8. Chart 0.3.0 ships `v0.1.0` which only accepts the legacy u64 contract and rejects every signing call from current obol-stack with HTTP 422 "chain_id: invalid type: string \"84532\", expected u64". OpenClaw was bumped to 0.3.1 in PR #374 but Hermes was missed — the two charts are pinned in independent constants and Renovate only updated one. flow-14 step 23 (Alice ERC-8004 register via remote- signer) reproduced the failure on every run against current main. TestRemoteSignerChartVersionConsistency reads both source files at test time and asserts the two pins agree, so future chart bumps either touch both files together or fail CI. Pairs with: PR #357 (closed in favour of #359), task #46. --- internal/hermes/chart_consistency_test.go | 45 +++++++++++++++++++++++ internal/hermes/hermes.go | 7 +++- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 internal/hermes/chart_consistency_test.go diff --git a/internal/hermes/chart_consistency_test.go b/internal/hermes/chart_consistency_test.go new file mode 100644 index 00000000..8a23e225 --- /dev/null +++ b/internal/hermes/chart_consistency_test.go @@ -0,0 +1,45 @@ +package hermes + +import ( + "os" + "regexp" + "testing" +) + +// TestRemoteSignerChartVersionConsistency catches drift between the two +// independent pins of the shared `remote-signer` Helm chart. +// +// The same chart is referenced from internal/hermes/hermes.go and +// internal/openclaw/openclaw.go — once each, in private package constants +// — and Renovate has historically updated only one of the two when a new +// chart version landed. That left Hermes pinned to chart 0.3.0 (image +// v0.1.0, legacy u64 signer contract) while OpenClaw moved to 0.3.1 +// (image v0.2.0, canonical-string contract), which silently broke +// `obol sell register` for any flow that exercised the Hermes path +// against the post-PR-#359 client. +// +// This test is intentionally a string match against the source files so +// it works without exporting the constants or restructuring the package +// graph. If you bump one pin, bump the other in the same commit. +func TestRemoteSignerChartVersionConsistency(t *testing.T) { + hermesV := readChartPin(t, "hermes.go") + openclawV := readChartPin(t, "../openclaw/openclaw.go") + if hermesV != openclawV { + t.Fatalf("remote-signer chart pin drift:\n internal/hermes/hermes.go = %q\n internal/openclaw/openclaw.go = %q\nbump both pins in the same commit", + hermesV, openclawV) + } +} + +func readChartPin(t *testing.T, relPath string) string { + t.Helper() + raw, err := os.ReadFile(relPath) + if err != nil { + t.Fatalf("read %s: %v", relPath, err) + } + re := regexp.MustCompile(`(?m)^\s*remoteSignerChartVersion\s*=\s*"([^"]+)"`) + m := re.FindStringSubmatch(string(raw)) + if len(m) < 2 { + t.Fatalf("could not find remoteSignerChartVersion in %s", relPath) + } + return m[1] +} diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go index 876af18d..6a813b43 100644 --- a/internal/hermes/hermes.go +++ b/internal/hermes/hermes.go @@ -31,7 +31,12 @@ const ( obolSkillsDirName = "obol-skills" // renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/ - remoteSignerChartVersion = "0.3.0" + // MUST match internal/openclaw/openclaw.go's remoteSignerChartVersion — + // guarded by TestRemoteSignerChartVersionConsistency. Chart 0.3.1 ships + // remote-signer image v0.2.0 which accepts the canonical-string signer + // contract (PR #359 / commit b9495b8); 0.3.0 ships v0.1.0 which only + // accepts the legacy u64 contract and breaks `obol sell register`. + remoteSignerChartVersion = "0.3.1" // renovate: datasource=helm depName=raw registryUrl=https://bedag.github.io/helm-charts/ rawChartVersion = "2.0.2" From 54001c76775215c68a1ff9ccabe0278619c1e66c Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 29 Apr 2026 16:35:15 +0800 Subject: [PATCH 6/6] refactor(charts): single source of truth for remote-signer chart pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both Hermes and OpenClaw deploy the same `remote-signer` Helm chart but each held its own private constant + Renovate annotation. PR #374 bumped only OpenClaw to 0.3.1; Hermes stayed on 0.3.0 and shipped image v0.1.0 which rejects the canonical-string signer contract — exactly the drift class TestRemoteSignerChartVersionConsistency was added to catch. Promote the pin to a single exported constant in `internal/agentruntime/charts.go` (the package both consumers already import for Namespace/Hostname/KeystoreVolumePath, no new dep edge), move the Renovate annotation to live alongside it, and delete the consistency test — drift is now structurally impossible. Mirrors the OPENCLAW_VERSION pattern (single source-of-truth file + TestOpenClawVersionConsistency over its three consumers); future shared chart pins follow the same shape under internal/agentruntime/. --- internal/agentruntime/charts.go | 16 ++++++++ internal/hermes/chart_consistency_test.go | 45 ----------------------- internal/hermes/hermes.go | 9 +---- internal/openclaw/openclaw.go | 6 +-- 4 files changed, 18 insertions(+), 58 deletions(-) create mode 100644 internal/agentruntime/charts.go delete mode 100644 internal/hermes/chart_consistency_test.go diff --git a/internal/agentruntime/charts.go b/internal/agentruntime/charts.go new file mode 100644 index 00000000..316a1bec --- /dev/null +++ b/internal/agentruntime/charts.go @@ -0,0 +1,16 @@ +package agentruntime + +// RemoteSignerChartVersion is the single source of truth for the +// `remote-signer` Helm chart pin used by both Hermes and OpenClaw +// deployments. It MUST be updated as a single edit; bumping it here +// updates every consumer in lockstep. +// +// Chart 0.3.1 ships remote-signer image `v0.2.0`, which accepts the +// canonical-string signer contract (chain_id, value, etc. serialized +// as JSON strings) introduced by PR #359. Chart 0.3.0 ships `v0.1.0`, +// which only accepts the legacy u64 contract and breaks `obol sell +// register` for current obol-stack with HTTP 422 "chain_id: invalid +// type: string \"84532\", expected u64". +// +// renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/ +const RemoteSignerChartVersion = "0.3.1" diff --git a/internal/hermes/chart_consistency_test.go b/internal/hermes/chart_consistency_test.go deleted file mode 100644 index 8a23e225..00000000 --- a/internal/hermes/chart_consistency_test.go +++ /dev/null @@ -1,45 +0,0 @@ -package hermes - -import ( - "os" - "regexp" - "testing" -) - -// TestRemoteSignerChartVersionConsistency catches drift between the two -// independent pins of the shared `remote-signer` Helm chart. -// -// The same chart is referenced from internal/hermes/hermes.go and -// internal/openclaw/openclaw.go — once each, in private package constants -// — and Renovate has historically updated only one of the two when a new -// chart version landed. That left Hermes pinned to chart 0.3.0 (image -// v0.1.0, legacy u64 signer contract) while OpenClaw moved to 0.3.1 -// (image v0.2.0, canonical-string contract), which silently broke -// `obol sell register` for any flow that exercised the Hermes path -// against the post-PR-#359 client. -// -// This test is intentionally a string match against the source files so -// it works without exporting the constants or restructuring the package -// graph. If you bump one pin, bump the other in the same commit. -func TestRemoteSignerChartVersionConsistency(t *testing.T) { - hermesV := readChartPin(t, "hermes.go") - openclawV := readChartPin(t, "../openclaw/openclaw.go") - if hermesV != openclawV { - t.Fatalf("remote-signer chart pin drift:\n internal/hermes/hermes.go = %q\n internal/openclaw/openclaw.go = %q\nbump both pins in the same commit", - hermesV, openclawV) - } -} - -func readChartPin(t *testing.T, relPath string) string { - t.Helper() - raw, err := os.ReadFile(relPath) - if err != nil { - t.Fatalf("read %s: %v", relPath, err) - } - re := regexp.MustCompile(`(?m)^\s*remoteSignerChartVersion\s*=\s*"([^"]+)"`) - m := re.FindStringSubmatch(string(raw)) - if len(m) < 2 { - t.Fatalf("could not find remoteSignerChartVersion in %s", relPath) - } - return m[1] -} diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go index 6a813b43..28cefe68 100644 --- a/internal/hermes/hermes.go +++ b/internal/hermes/hermes.go @@ -30,13 +30,6 @@ const ( gatewayTokenFileName = ".gateway-token" obolSkillsDirName = "obol-skills" - // renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/ - // MUST match internal/openclaw/openclaw.go's remoteSignerChartVersion — - // guarded by TestRemoteSignerChartVersionConsistency. Chart 0.3.1 ships - // remote-signer image v0.2.0 which accepts the canonical-string signer - // contract (PR #359 / commit b9495b8); 0.3.0 ships v0.1.0 which only - // accepts the legacy u64 contract and breaks `obol sell register`. - remoteSignerChartVersion = "0.3.1" // renovate: datasource=helm depName=raw registryUrl=https://bedag.github.io/helm-charts/ rawChartVersion = "2.0.2" @@ -630,7 +623,7 @@ releases: version: %s values: - values-remote-signer.yaml -`, namespace, rawChartVersion, valuesFileName, namespace, remoteSignerChartVersion) +`, namespace, rawChartVersion, valuesFileName, namespace, agentruntime.RemoteSignerChartVersion) } func dashboardHostname(id string) string { diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index eb8774e7..0ed58079 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -48,10 +48,6 @@ const ( // chartVersion pins the openclaw Helm chart version from the obol repo. // renovate: datasource=helm depName=openclaw registryUrl=https://obolnetwork.github.io/helm-charts/ chartVersion = "0.4.0" - - // remoteSignerChartVersion pins the remote-signer Helm chart version. - // renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/ - remoteSignerChartVersion = "0.3.1" ) // openclawVersionRaw is the single source of truth for the upstream OpenClaw @@ -2802,5 +2798,5 @@ releases: version: %s values: - values-remote-signer.yaml -`, id, namespace, chartVersion, namespace, remoteSignerChartVersion) +`, id, namespace, chartVersion, namespace, agentruntime.RemoteSignerChartVersion) }