From 29ab4c5a39e30971ab4f595f96b6cc7464794b67 Mon Sep 17 00:00:00 2001
From: bussyjd <jd@obol.tech>
Date: Tue, 21 Apr 2026 23:19:27 +0900
Subject: [PATCH] fix flow-11 validation harness on latest main

---
 .agents/skills/obol-stack-dev/SKILL.md |  15 ++-
 flows/flow-11-dual-stack.sh            | 146 +++++++++++++++++++------
 2 files changed, 129 insertions(+), 32 deletions(-)
diff --git a/.agents/skills/obol-stack-dev/SKILL.md b/.agents/skills/obol-stack-dev/SKILL.md
index 730af818..a103c1cd 100644
--- a/.agents/skills/obol-stack-dev/SKILL.md
+++ b/.agents/skills/obol-stack-dev/SKILL.md
@@ -2,7 +2,7 @@
 name: obol-stack-dev
 description: Obol Stack development, testing, and validation. Covers LLM routing through LiteLLM, x402 payment flow (sell/buy), BDD integration tests (Gherkin/godog), ERC-8004 registration, and obol CLI wrappers.
 metadata:
-  version: "2.0.0"
+  version: "2.0.1"
   domain: infrastructure
   triggers: obol, litellm, openclaw, inference, integration test, model routing, smart routing, LLM proxy, provider setup, x402, sell, buy, BDD, gherkin, payment, monetize
   role: specialist
@@ -115,6 +115,7 @@ obol openclaw dashboard <id>            # Open web UI
 obol kubectl get pods -n openclaw-<id>
 obol kubectl logs -n openclaw-<id> -l app.kubernetes.io/instance=openclaw
 obol kubectl port-forward -n openclaw-<id> svc/openclaw 18789:18789
+curl -fsS http://127.0.0.1:18789/healthz
 
 # --- Testing ---
 go test ./internal/openclaw/                                    # Unit tests
@@ -192,6 +193,7 @@ obol kubectl exec -i -n openclaw-<id> deploy/openclaw -c openclaw -- python3 - <
 - Set `Authorization: Bearer <token>` on all `/v1/chat/completions` requests
 - Use `obol model setup --provider <name> --api-key <key>` for cloud provider config
 - Wait for pod readiness AND HTTP readiness before sending inference requests
+- When validating live seller/buyer commerce flows, capture the registration, funding, and settlement transaction hashes and archive the receipts with the test log
 - Clean up test instances with `obol openclaw delete --force <id>` (flag BEFORE arg)
 - Set env vars for dev mode: `OBOL_DEVELOPMENT=true`, `OBOL_CONFIG_DIR`, `OBOL_BIN_DIR`, `OBOL_DATA_DIR`
 - Prefer `qwen3.5:9b` when validating the current local paid-inference route
@@ -204,6 +206,8 @@ obol kubectl exec -i -n openclaw-<id> deploy/openclaw -c openclaw -- python3 - <
 - Skip the gateway token (causes 401 Unauthorized)
 - Put `--force` flag after the argument in `obol openclaw delete` (urfave/cli v2 quirk)
 - Assume TCP connectivity means HTTP is ready (port-forward warmup race)
+- Assume `obol sell http ... --register-*` is a pure route-publish step on latest main; it now auto-registers by default when registration metadata is present
+- Combine `--no-register` with any `--register-*` flags; latest main rejects that combination
 - Use `app.kubernetes.io/instance=openclaw-<id>` for pod labels (Helm uses `openclaw`)
 - Run multiple integration tests without cleaning up between them (pod sandbox errors)
 - Delegate or accept broad "review the architecture" findings without converting them into concrete file-level checks and reproducible tests.
@@ -259,6 +263,13 @@ go test -tags integration -v -run TestIntegration_PaymentGate_FullLifecycle -tim
 go test -tags integration -v -run TestIntegration_Tunnel_SellDiscoverBuySidecar_QuotaAndBalance -timeout 30m ./internal/openclaw/
 ```
 
+### Flow-11 Notes
+
+- `./flows/flow-11-dual-stack.sh` is the most human-like Alice/Bob validation path. On latest main, treat `RoutePublished=True`, a live external `402`, and a resolvable `/.well-known/agent-registration.json` as the seller-readiness checks that matter.
+- If you pass `--wallet` together with registration metadata to `obol sell http`, the registration signer must match that wallet. Using a mismatched private key causes automatic registration to fail.
+- For Bob's OpenClaw API, a local TCP connect is not enough; wait for `GET /healthz` over the forwarded port before sending `/v1/chat/completions`.
+- Natural-language buy flows can surface an approval challenge such as `/approve <id> allow-once` before `buy.py buy` actually runs. Unattended harnesses must either execute the buy tool directly or send the approval turn back through the same conversation.
+
 ### Known Gotchas
 
 - **ExternalName services**: Traefik Gateway API rejects ExternalName as HTTPRoute backends → 500 after valid payment. Use ClusterIP+Endpoints.
@@ -268,3 +279,5 @@ go test -tags integration -v -run TestIntegration_Tunnel_SellDiscoverBuySidecar_
 - **Projected ConfigMap refresh**: the LiteLLM pod can take ~60s to reflect updated buyer ConfigMaps in the sidecar.
 - **eRPC balance lag**: `buy.py balance` uses `eth_call` through eRPC, and the default unfinalized cache TTL is 10s. After a paid request, poll until the reported balance catches up with the on-chain delta.
 - **kubectl exec shell quoting**: NEVER use `sh -c` with `fmt.Sprintf` to embed JSON or secrets in shell commands passed via `kubectl exec`. JSON body or auth tokens containing single quotes will break the shell. Instead, pass args directly: `kubectl exec ... -- wget -qO- --post-data=<json> --header=Authorization:\ Bearer\ <key> <url>`. Each argument goes as a separate argv element, bypassing shell interpretation entirely.
+- **OpenClaw exec preflight**: prompting the agent to run `cd ... && python3 scripts/foo.py` or shell loops can be rejected by the exec preflight guard. Prefer direct interpreter invocations with absolute script paths when you need the agent to run a skill script.
+- **Quick Tunnel flakiness**: fresh `trycloudflare.com` hostnames can briefly fail DNS resolution or return Cloudflare 530/1033 pages. Before asking Bob to discover Alice, verify the tunnel hostname resolves and `/.well-known/agent-registration.json` returns JSON from outside the cluster.
diff --git a/flows/flow-11-dual-stack.sh b/flows/flow-11-dual-stack.sh
index e62f24d8..7d412be6 100755
--- a/flows/flow-11-dual-stack.sh
+++ b/flows/flow-11-dual-stack.sh
@@ -354,7 +354,8 @@ pass "Base Sepolia RPC added to eRPC (with write access)"
 step "Alice: create ServiceOffer"
 KEY_FILE=$(mktemp)
 echo "$SIGNER_KEY" > "$KEY_FILE"
-alice sell http alice-inference \
+set +e
+sell_out=$(alice sell http alice-inference \
     --wallet "$ALICE_WALLET" \
     --chain base-sepolia \
     --per-request 0.001 \
@@ -366,13 +367,20 @@ alice sell http alice-inference \
     --register-description "Integration test: local model inference via x402" \
     --register-skills natural_language_processing/text_generation \
     --register-domains technology/artificial_intelligence \
-    --private-key-file "$KEY_FILE" 2>&1 | tail -8
+    --private-key-file "$KEY_FILE" 2>&1)
+sell_rc=$?
+set -e
 rm -f "$KEY_FILE"
+echo "$sell_out" | tail -12
+if [ "$sell_rc" -ne 0 ]; then
+    fail "ServiceOffer create failed: ${sell_out:0:240}"
+    emit_metrics; exit "$sell_rc"
+fi
 pass "ServiceOffer created"
 
-poll_step_grep "Alice: ServiceOffer Ready" "True" 24 5 \
+poll_step_grep "Alice: seller route published" "True" 24 5 \
     alice kubectl get serviceoffers.obol.org alice-inference -n llm \
-        -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
+        -o jsonpath='{.status.conditions[?(@.type=="RoutePublished")].status}'
 
 step "Alice: tunnel URL"
 TUNNEL_URL=$(alice tunnel status 2>&1 | grep -oE 'https://[a-z0-9-]+\.trycloudflare\.com' | head -1)
@@ -387,15 +395,8 @@ poll_step_grep "Alice: 402 gate works" "402" 12 5 \
         "$TUNNEL_URL/services/alice-inference/v1/chat/completions" \
         -H "Content-Type: application/json" \
         -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"hi"}],"max_tokens":5}'
-step "Alice: ERC-8004 registration reflected in ServiceOffer"
-reg_out=$(alice sell status alice-inference -n llm 2>&1) || true
-echo "$reg_out" | tail -12
-if echo "$reg_out" | grep -q "Agent ID:"; then
-    AGENT_ID=$(echo "$reg_out" | grep 'Agent ID:' | awk '{print $3}' | head -1)
-    pass "ERC-8004 registered: Agent ID $AGENT_ID"
-else
-    fail "Registration not reflected in sell status: ${reg_out:0:200}"
-fi
+poll_step_grep "Alice: registration JSON served" "\"x402Support\"" 12 5 \
+    curl -sf --max-time 15 "$TUNNEL_URL/.well-known/agent-registration.json"
 
 # ═════════════════════════════════════════════════════════════════
 # BOOTSTRAP BOB (buyer, configurable ports)
@@ -484,20 +485,7 @@ PF_AGENT=$!
 step "Bob: OpenClaw API port-forward ready"
 pf_ready=0
 for i in $(seq 1 20); do
-    if python3 - "$BOB_AGENT_PORT" <<'PY'
-import socket
-import sys
-
-sock = socket.socket()
-sock.settimeout(1)
-try:
-    sock.connect(("127.0.0.1", int(sys.argv[1])))
-except OSError:
-    sys.exit(1)
-finally:
-    sock.close()
-PY
-    then
+    if curl -fsS --max-time 5 "http://localhost:${BOB_AGENT_PORT}/healthz" >/dev/null 2>&1; then
         pf_ready=1
         break
     fi
@@ -516,7 +504,9 @@ else
 fi
 
 step "Bob's agent: discover Alice via ERC-8004 registry"
-discover_response=$(curl -sf --max-time 300 \
+discover_body=$(mktemp)
+set +e
+discover_http=$(curl -sS --max-time 300 -o "$discover_body" -w '%{http_code}' \
     -X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
     -H "Authorization: Bearer $BOB_TOKEN" \
     -H "Content-Type: application/json" \
@@ -528,7 +518,20 @@ discover_response=$(curl -sf --max-time 300 \
         }],
         \"max_tokens\": 4000,
 	        \"stream\": false
-	    }" 2>&1)
+	    }" 2>"$discover_body.stderr")
+discover_rc=$?
+set -e
+discover_response=$(<"$discover_body")
+discover_stderr=$(cat "$discover_body.stderr" 2>/dev/null || true)
+rm -f "$discover_body" "$discover_body.stderr"
+if [ "$discover_rc" -ne 0 ] || [ "${discover_http:-000}" -lt 200 ] || [ "${discover_http:-000}" -ge 300 ]; then
+    fail "Discovery request failed (curl=$discover_rc http=${discover_http:-000}): ${discover_response:0:300}${discover_stderr:0:120}"
+    echo "  Port-forward log:"
+    tail -n 20 "$PF_AGENT_LOG" 2>/dev/null | sed 's/^/    /'
+    cleanup_pid "$PF_AGENT"
+    rm -f "$PF_AGENT_LOG"
+    emit_metrics; exit 1
+fi
 
 discover_content=$(extract_assistant_content "$discover_response" 2>/dev/null || true)
 echo "${discover_content:0:500}"
@@ -536,10 +539,15 @@ if [ -n "$discover_content" ] && [ "${#discover_content}" -gt 100 ]; then
     pass "Agent discovered Alice's service"
 else
     fail "Discovery response: ${discover_response:0:300}"
+    cleanup_pid "$PF_AGENT"
+    rm -f "$PF_AGENT_LOG"
+    emit_metrics; exit 1
 fi
 
 step "Bob's agent: buy inference from Alice"
-buy_response=$(curl -sf --max-time 300 \
+buy_body=$(mktemp)
+set +e
+buy_http=$(curl -sS --max-time 300 -o "$buy_body" -w '%{http_code}' \
     -X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
     -H "Authorization: Bearer $BOB_TOKEN" \
     -H "Content-Type: application/json" \
@@ -552,14 +560,90 @@ buy_response=$(curl -sf --max-time 300 \
         ],
         \"max_tokens\": 4000,
 	        \"stream\": false
-	    }" 2>&1)
+	    }" 2>"$buy_body.stderr")
+buy_rc=$?
+set -e
+buy_response=$(<"$buy_body")
+buy_stderr=$(cat "$buy_body.stderr" 2>/dev/null || true)
+rm -f "$buy_body" "$buy_body.stderr"
+if [ "$buy_rc" -ne 0 ] || [ "${buy_http:-000}" -lt 200 ] || [ "${buy_http:-000}" -ge 300 ]; then
+    fail "Buy request failed (curl=$buy_rc http=${buy_http:-000}): ${buy_response:0:300}${buy_stderr:0:120}"
+    echo "  Port-forward log:"
+    tail -n 20 "$PF_AGENT_LOG" 2>/dev/null | sed 's/^/    /'
+    cleanup_pid "$PF_AGENT"
+    rm -f "$PF_AGENT_LOG"
+    emit_metrics; exit 1
+fi
 
 buy_content=$(extract_assistant_content "$buy_response" 2>/dev/null || true)
 echo "${buy_content:0:500}"
+approve_cmd=$(printf '%s\n' "$buy_content" | grep -oE '/approve [A-Za-z0-9-]+ allow-once' | head -1 || true)
+if [ -n "$approve_cmd" ]; then
+    echo "  Approving pending buy command: $approve_cmd"
+    approve_payload=$(mktemp)
+    BUY_ASSISTANT_CONTENT="$buy_content" BUY_APPROVE_CMD="$approve_cmd" BUY_ENDPOINT="$TUNNEL_URL/services/alice-inference" python3 - <<'PY' >"$approve_payload"
+import json
+import os
+
+messages = [
+    {
+        "role": "user",
+        "content": "Search the ERC-8004 registry on Base Sepolia for the agent named 'Dual-Stack Test Inference'. Report its endpoint.",
+    },
+    {
+        "role": "assistant",
+        "content": f"I found the agent. Its endpoint is {os.environ['BUY_ENDPOINT']}",
+    },
+    {
+        "role": "user",
+        "content": (
+            "Now use the buy-inference skill to buy 5 inference tokens from Alice. "
+            f"Run exactly: python3 scripts/buy.py buy alice-inference --endpoint "
+            f"{os.environ['BUY_ENDPOINT']}/v1/chat/completions --model qwen3.5:9b --count 5"
+        ),
+    },
+    {
+        "role": "assistant",
+        "content": os.environ["BUY_ASSISTANT_CONTENT"],
+    },
+    {
+        "role": "user",
+        "content": os.environ["BUY_APPROVE_CMD"],
+    },
+]
+print(json.dumps({"model": "openclaw", "messages": messages, "max_tokens": 4000, "stream": False}))
+PY
+    approve_body=$(mktemp)
+    set +e
+    approve_http=$(curl -sS --max-time 300 -o "$approve_body" -w '%{http_code}' \
+        -X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
+        -H "Authorization: Bearer $BOB_TOKEN" \
+        -H "Content-Type: application/json" \
+        --data @"$approve_payload" 2>"$approve_body.stderr")
+    approve_rc=$?
+    set -e
+    approve_response=$(<"$approve_body")
+    approve_stderr=$(cat "$approve_body.stderr" 2>/dev/null || true)
+    rm -f "$approve_payload" "$approve_body" "$approve_body.stderr"
+    if [ "$approve_rc" -ne 0 ] || [ "${approve_http:-000}" -lt 200 ] || [ "${approve_http:-000}" -ge 300 ]; then
+        fail "Buy approval request failed (curl=$approve_rc http=${approve_http:-000}): ${approve_response:0:300}${approve_stderr:0:120}"
+        echo "  Port-forward log:"
+        tail -n 20 "$PF_AGENT_LOG" 2>/dev/null | sed 's/^/    /'
+        cleanup_pid "$PF_AGENT"
+        rm -f "$PF_AGENT_LOG"
+        emit_metrics; exit 1
+    fi
+    buy_response=$approve_response
+    buy_content=$(extract_assistant_content "$buy_response" 2>/dev/null || true)
+    echo "${buy_content:0:500}"
+fi
 if [ -n "$buy_content" ] && [ "${#buy_content}" -gt 100 ]; then
     pass "Agent bought Alice's inference"
 else
     fail "Buy response: ${buy_response:0:300}"
+    cleanup_pid "$PF_AGENT"
+    rm -f "$PF_AGENT_LOG"
+    emit_metrics; exit 1
 fi
 
 poll_step_grep "Bob: PurchaseRequest Ready" "True" 24 5 purchase_request_status