diff --git a/internal/agentcrd/agent_contract_integration_test.go b/internal/agentcrd/agent_contract_integration_test.go index 146b28a2..66e097ea 100644 --- a/internal/agentcrd/agent_contract_integration_test.go +++ b/internal/agentcrd/agent_contract_integration_test.go @@ -40,8 +40,8 @@ import ( // (agentcrd.HostNoBundledSkillsMarkerPath), so Hermes' installer/sync // skips seeding its ~80 bundled skills; // (2) the rendered hermes-config ConfigMap in the agent's namespace carries -// the capped knobs: lifetime_seconds: 90, max_turns: 30, -// reasoning_effort: low, and disabled_toolsets {memory, web}; +// the capped knobs: lifetime_seconds: 180, max_turns: 30, +// reasoning_effort: low, and disabled_toolsets {memory, web, code_execution}; // (3) a BEHAVIORAL signal that bundled skills were actually skipped — see // assertBundledSkillsSkippedInPod for why we assert pod filesystem state // rather than grep a log line. @@ -233,7 +233,7 @@ func getHermesConfigYAML(t *testing.T, cfg *config.Config, ns string) string { func assertHermesConfigCaps(t *testing.T, cfgYAML string) { t.Helper() for _, want := range []string{ - "lifetime_seconds: 90", + "lifetime_seconds: 180", "max_turns: 30", "reasoning_effort: low", "disabled_toolsets:", diff --git a/internal/embed/infrastructure/base/templates/agent-crd.yaml b/internal/embed/infrastructure/base/templates/agent-crd.yaml index 8338c0d6..3b0f578b 100644 --- a/internal/embed/infrastructure/base/templates/agent-crd.yaml +++ b/internal/embed/infrastructure/base/templates/agent-crd.yaml @@ -64,6 +64,41 @@ spec: type: object spec: properties: + mcpServers: + description: |- + MCPServers registers native MCP servers in the agent's Hermes + config (mcp_servers:). Hermes discovers each server's tools and + exposes them as first-class tools. stdio (command+args) for a + local server, or url for a remote one. + items: + properties: + args: + items: + type: string + maxItems: 64 + type: array + command: + maxLength: 512 + type: string + env: + additionalProperties: + type: string + type: object + name: + maxLength: 64 + pattern: ^[a-z0-9][a-z0-9-]*$ + type: string + transport: + maxLength: 16 + type: string + url: + maxLength: 512 + type: string + required: + - name + type: object + maxItems: 32 + type: array model: description: |- LiteLLM model name to pin. Empty = controller picks cluster diff --git a/internal/embed/skills/buy-x402/SKILL.md b/internal/embed/skills/buy-x402/SKILL.md index a3e02a5d..e17c0fae 100644 --- a/internal/embed/skills/buy-x402/SKILL.md +++ b/internal/embed/skills/buy-x402/SKILL.md @@ -9,7 +9,7 @@ metadata: { "openclaw": { "emoji": "\ud83d\uded2", "requires": { "bins": ["pytho Purchase access to remote x402-gated services. There are two flows, picked by usage shape: - **`pay `** — single-shot. Probe the URL, sign **one** payment authorization, attach `X-PAYMENT`, send the request, return the response. Stateless. Use for `type:http` services and any one-off purchase. Max loss = price of one request. Settlement normally lands only after the request succeeds — but a facilitator can submit the settle tx on-chain and *then* fail the request. When that happens the failure report prints `⚠️ SETTLEMENT MAY HAVE COMPLETED ON-CHAIN` with the tx hash: verify with `balance --chain ` before retrying (mechanism: docs/observability.md, "Verify settlement against the chain"). Applies to `pay-agent` too. -- **`pay-agent --model `** — single-shot paid **streaming** agent call. Same payment shape as `pay` (one auth, X-PAYMENT, max-loss = price), but POSTs to `/v1/chat/completions` with `stream: true` and forwards every SSE event verbatim to stdout as it arrives. Use this for `type:agent` ServiceOffers when the calling agent wants to consume the response *itself* (memory, tool-call traces, partial results) instead of routing it through LiteLLM as a paid alias. Default HTTP read timeout is **1 hour** — agent calls can legitimately run for many minutes; override with `--timeout `. +- **`pay-agent `** — single-shot paid **streaming** agent call. Same payment shape as `pay` (one auth, X-PAYMENT, max-loss = price), but POSTs to `/v1/chat/completions` with `stream: true` and forwards every SSE event verbatim to stdout as it arrives. No `--model`: a `type:agent` offer runs its own model (the request `model` field is ignored), so you only send a prompt. Use this for `type:agent` ServiceOffers when the calling agent wants to consume the response *itself* (memory, tool-call traces, partial results) instead of routing it through LiteLLM as a paid alias. Default HTTP read timeout is **1 hour** — agent calls can legitimately run for many minutes; override with `--timeout `. - **`buy `** — pre-authorize a budget. Sign **N** authorizations up front (the buyer pays nothing yet), declare them in a `PurchaseRequest` CR, let the `x402-buyer` sidecar redeem them transparently as the agent calls the model through LiteLLM at `paid/`. Use for long-running paid inference. Max loss = N × price (only as vouchers are spent); runtime path holds zero signer access. - **`buy --model --set-default`** — same as `buy` above, then adopt `paid/` as the agent's **own primary model**, in-pod, by itself: an atomic `hermes config set model.default` that Hermes re-reads per request (effective next chat turn, **no restart**, no host-side `obol model prefer`/`obol model sync`). Refuses if the model isn't selectable in LiteLLM. Pair with `--auto-refill` so the primary model doesn't brick when the pre-authorized budget runs out. @@ -134,7 +134,7 @@ python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay h # One-shot paid STREAMING agent call (SSE events flushed to stdout as they arrive) python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay-agent \ https://seller.example.com/services/demo-quant \ - --model qwen3.5:9b --message 'summarize the latest research on staking' + --message 'summarize the latest research on staking' # Pay-agent with a full OpenAI-compatible body (stream:true is forced on) python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay-agent \ @@ -187,7 +187,7 @@ python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py maint |---------|-------------| | `probe [--model ] [--type http\|inference\|agent] [--method GET\|POST]` | Send request without payment, parse 402 response for pricing | | `pay [--type http\|inference] [--method GET\|POST] [--data ]` | Single-shot paid request: sign 1 auth, attach X-PAYMENT, send | -| `pay-agent --model [--message \| --data ] [--timeout ]` | Single-shot paid streaming agent call: SSE events flush to stdout as they arrive (default timeout 1h) | +| `pay-agent [--message \| --data ] [--timeout ]` | Single-shot paid streaming agent call (no `--model` — the agent runs its own): SSE events flush to stdout as they arrive (default timeout 1h) | | `buy --endpoint --model [--budget N] [--count N]` | Pre-sign auths, create/update `PurchaseRequest`, expose `paid/` | | `buy --endpoint --model --set-default [--auto-refill]` | As above, then set `paid/` as the agent's own primary model in-pod (no restart, no host CLI) | | `process \| --all` | Reconcile `autoRefill` policies against live `x402-buyer` status | diff --git a/internal/embed/skills/buy-x402/scripts/buy.py b/internal/embed/skills/buy-x402/scripts/buy.py index caf4cbc5..a9541f06 100644 --- a/internal/embed/skills/buy-x402/scripts/buy.py +++ b/internal/embed/skills/buy-x402/scripts/buy.py @@ -2372,7 +2372,7 @@ def cmd_pay(url, method="GET", data=None, kind="http", network=None, timeout=Non sys.exit(1) -def cmd_pay_agent(url, messages=None, model_id=None, network=None, timeout=None, body=None, token=None, payment_option=None): +def cmd_pay_agent(url, messages=None, network=None, timeout=None, body=None, token=None, payment_option=None): """Single-shot paid streaming agent call: probe -> sign one auth -> SSE-stream. Sibling of `cmd_pay` for `type=agent` ServiceOffers. Differences from @@ -2392,9 +2392,10 @@ def cmd_pay_agent(url, messages=None, model_id=None, network=None, timeout=None, alias. `body` is an optional JSON-encoded request body. When omitted, `messages` - + `model_id` are required and a `{model, messages, stream:true}` body is - synthesized. When provided, the body is parsed and `"stream": true` is - forced onto whatever the caller passed. + is required and a `{messages, stream:true}` body is synthesized — NO `model` + field: a type=agent offer runs its own model and ignores any `model` sent. + When provided, the body is parsed and `"stream": true` is forced onto + whatever the caller passed. """ if timeout is None or float(timeout) <= 0: timeout = 3600.0 @@ -2414,27 +2415,24 @@ def cmd_pay_agent(url, messages=None, model_id=None, network=None, timeout=None, # Force streaming on. cmd_pay handles non-streaming; cmd_pay_agent # exists precisely to stream. parsed_body["stream"] = True - if model_id and not parsed_body.get("model"): - parsed_body["model"] = model_id else: if not messages: print( "Error: --message (or --data ) is required for `pay-agent`.\n" - "Example: pay-agent --model qwen3.5:9b --message 'summarize the docs'", + "Example: pay-agent --message 'summarize the docs'", file=sys.stderr, ) sys.exit(1) - if not model_id: - print("Error: --model is required when using --message.", file=sys.stderr) - sys.exit(1) + # type=agent ServiceOffers run their own model — there is nothing to + # select and the agent ignores any `model` field — so pay-agent sends + # only the prompt. parsed_body = { - "model": model_id, "messages": [{"role": "user", "content": messages}], "stream": True, } print(f"Probing {url} ...") - pricing = _probe_endpoint(url, model_id=model_id or "test", kind="inference") + pricing = _probe_endpoint(url, model_id="probe", kind="inference") if not pricing: print("Failed to get x402 pricing.", file=sys.stderr) sys.exit(1) @@ -2708,7 +2706,7 @@ def usage(): print(" Single-shot paid request (sign 1 auth, attach X-PAYMENT)") print(" Multi-currency offers: pick which asset/price to pay with") print(" --token/--network/--payment-option (probe to see options)") - print(" pay-agent --model [--message '' | --data ''] [--timeout ]") + print(" pay-agent [--message '' | --data ''] [--timeout ]") print(" [--token ] [--network ] [--payment-option ]") print(" Single-shot paid streaming agent call (POST /v1/chat/completions,") print(" stream: true). Each SSE event flushes to stdout so a calling") @@ -2788,7 +2786,7 @@ def usage(): positional, opts = parse_flags(rest) if not positional: print( - "Usage: pay-agent --model [--message '' | --data ''] " + "Usage: pay-agent [--message '' | --data ''] " "[--network ] [--timeout ]", file=sys.stderr, ) @@ -2805,7 +2803,6 @@ def usage(): cmd_pay_agent( positional[0], messages=opts.get("message"), - model_id=opts.get("model"), network=opts.get("network"), timeout=timeout, body=opts.get("data"), diff --git a/internal/monetizeapi/types.go b/internal/monetizeapi/types.go index e6161093..8bda6af7 100644 --- a/internal/monetizeapi/types.go +++ b/internal/monetizeapi/types.go @@ -686,6 +686,13 @@ type AgentSpec struct { // +kubebuilder:validation:MaxLength=4096 Objective string `json:"objective,omitempty"` Wallet AgentWallet `json:"wallet,omitempty"` + // MCPServers registers native MCP servers in the agent's Hermes config + // (mcp_servers:). Hermes discovers each server's tools and exposes them as + // first-class tools — the harness serializes the args, so the model never + // hand-builds JSON-in-shell. Use a stdio server (command+args) for a local, + // payment-abstracting wrapper, or url for a remote one. + // +kubebuilder:validation:MaxItems=32 + MCPServers []AgentMCPServer `json:"mcpServers,omitempty"` } type AgentWallet struct { @@ -695,6 +702,33 @@ type AgentWallet struct { Create bool `json:"create,omitempty"` } +// AgentMCPServer is one entry under Hermes' mcp_servers: config. stdio +// (Command+Args) spawns a local MCP server; URL (+Transport "sse") connects to +// a remote one. Env values may use ${VAR}, which Hermes interpolates from the +// pod environment at load — keep raw secrets out of the CR (reference them as +// ${REMOTE_SIGNER_TOKEN} etc.; Hermes filters the stdio subprocess env, so +// anything the server needs must be listed here). +type AgentMCPServer struct { + // Key under mcp_servers (e.g. "hyperliquid"). + // +kubebuilder:validation:Pattern=`^[a-z0-9][a-z0-9-]*$` + // +kubebuilder:validation:MaxLength=64 + Name string `json:"name"` + // stdio transport: executable to spawn. + // +kubebuilder:validation:MaxLength=512 + Command string `json:"command,omitempty"` + // stdio transport: arguments for Command. + // +kubebuilder:validation:MaxItems=64 + Args []string `json:"args,omitempty"` + // http/sse transport: remote MCP server URL. + // +kubebuilder:validation:MaxLength=512 + URL string `json:"url,omitempty"` + // Transport override ("sse"); default is Streamable HTTP for a url server. + // +kubebuilder:validation:MaxLength=16 + Transport string `json:"transport,omitempty"` + // Environment for a stdio server. Values may use ${VAR} interpolation. + Env map[string]string `json:"env,omitempty"` +} + type AgentStatus struct { ObservedGeneration int64 `json:"observedGeneration,omitempty"` // Pending | Provisioning | Ready | Failed @@ -765,8 +799,7 @@ type AgentIdentityList struct { Items []AgentIdentity `json:"items"` } -type AgentIdentitySpec struct { -} +type AgentIdentitySpec struct{} type AgentIdentityStatus struct { // Per-chain ERC-8004 registrations for this identity document. diff --git a/internal/monetizeapi/zz_generated.deepcopy.go b/internal/monetizeapi/zz_generated.deepcopy.go index ffc01b89..1e26e118 100644 --- a/internal/monetizeapi/zz_generated.deepcopy.go +++ b/internal/monetizeapi/zz_generated.deepcopy.go @@ -188,6 +188,40 @@ func (in *AgentSpec) DeepCopyInto(out *AgentSpec) { copy(*out, *in) } out.Wallet = in.Wallet + if in.MCPServers != nil { + in, out := &in.MCPServers, &out.MCPServers + *out = make([]AgentMCPServer, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AgentMCPServer) DeepCopyInto(out *AgentMCPServer) { + *out = *in + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AgentMCPServer. +func (in *AgentMCPServer) DeepCopy() *AgentMCPServer { + if in == nil { + return nil + } + out := new(AgentMCPServer) + in.DeepCopyInto(out) + return out } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AgentSpec. diff --git a/internal/serviceoffercontroller/agent_render.go b/internal/serviceoffercontroller/agent_render.go index e1ade18a..90785a42 100644 --- a/internal/serviceoffercontroller/agent_render.go +++ b/internal/serviceoffercontroller/agent_render.go @@ -5,6 +5,8 @@ import ( "crypto/sha256" "encoding/hex" "fmt" + "sort" + "strings" "github.com/ObolNetwork/obol-stack/internal/monetizeapi" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -71,7 +73,7 @@ func agentManifests(agent *monetizeapi.Agent, litellmKey, apiKey string) ([]*uns return nil, fmt.Errorf("agentManifests: agent has no resolved model") } - configYAML := renderHermesConfig(model, litellmKey) + configYAML := renderHermesConfig(model, litellmKey, agent.Spec.MCPServers) out := []*unstructured.Unstructured{ buildAgentNamespace(agent.Namespace), @@ -94,13 +96,26 @@ func agentManifests(agent *monetizeapi.Agent, litellmKey, apiKey string) ([]*uns // // Sub-agent constraints: every Agent CR is a sub-agent-for-sale (the // master is deployed via `obol agent init`, not via ServiceOffer), so the -// terminal/agent caps below apply unconditionally. The Cloudflare free -// tunnel cuts off requests at 100s, so lifetime_seconds is bounded under -// that. terminal.timeout must stay <= lifetime_seconds so no single -// operation can outlive the session. max_turns and reasoning_effort cap -// chattiness, and disabled_toolsets drops Hermes tool families that aren't -// useful in a paid-service context (memory persistence, web search). -func renderHermesConfig(model, litellmKey string) string { +// terminal/agent caps below apply unconditionally. Sold agents run behind a +// named Cloudflare tunnel (no ~100s quick-tunnel idle cut), and a single paid +// data call can legitimately be slow (an x402 payment round-trip plus a +// first-party data query), so terminal.timeout/lifetime_seconds carry real +// headroom rather than the old 80s/90s that timed out heavier queries. +// terminal.timeout must stay <= lifetime_seconds so no single operation can +// outlive the session. max_turns and reasoning_effort cap chattiness, and +// disabled_toolsets drops Hermes tool families that aren't useful in a +// paid-service context (memory persistence, web search). +// +// code_execution (the `execute_code` tool) is disabled too: it runs arbitrary +// in-process Python whose subprocess/file calls bypass the terminal +// DANGEROUS_PATTERNS gate, so Hermes requires a per-script approval that no +// human can grant during an unattended paid turn — the tool just fails closed +// and small models loop on it (observed: gemma4 retrying execute_code until +// the turn dies). Skills that shell out (e.g. buy-x402, the hyperliquid +// data skill) run their `python3 .../foo.py` via the `terminal` tool instead, +// where a benign script auto-approves and genuinely dangerous commands stay +// gated — granular, not a blanket --yolo bypass. +func renderHermesConfig(model, litellmKey string, mcpServers []monetizeapi.AgentMCPServer) string { return fmt.Sprintf(`model: default: %q provider: custom @@ -109,8 +124,8 @@ func renderHermesConfig(model, litellmKey string) string { terminal: backend: local cwd: /data/.hermes/workspace - timeout: 80 - lifetime_seconds: 90 + timeout: 170 + lifetime_seconds: 180 docker_mount_cwd_to_workspace: false agent: max_turns: 30 @@ -118,10 +133,60 @@ agent: disabled_toolsets: - memory - web + - code_execution skills: external_dirs: - /data/.hermes/obol-skills -`, model, litellmKey) +`, model, litellmKey) + renderMCPServersBlock(mcpServers) +} + +// renderMCPServersBlock renders the optional `mcp_servers:` section from an +// Agent's MCPServers. Hermes (tools/mcp_tool.py) connects to each server, +// discovers its tools, and registers them as first-class tools — so the model +// calls them with harness-serialized args instead of hand-built JSON-in-shell. +// Empty -> "" (no section). Env values are emitted verbatim and may carry +// ${VAR} placeholders that Hermes interpolates from the pod env at load (the +// stdio subprocess env is otherwise filtered, so list everything the server +// needs, e.g. ${REMOTE_SIGNER_TOKEN}). Keys are sorted for deterministic output. +func renderMCPServersBlock(servers []monetizeapi.AgentMCPServer) string { + if len(servers) == 0 { + return "" + } + var b strings.Builder + b.WriteString("mcp_servers:\n") + for _, s := range servers { + if s.Name == "" { + continue + } + fmt.Fprintf(&b, " %s:\n", s.Name) + if s.Command != "" { + fmt.Fprintf(&b, " command: %q\n", s.Command) + } + if len(s.Args) > 0 { + b.WriteString(" args:\n") + for _, a := range s.Args { + fmt.Fprintf(&b, " - %q\n", a) + } + } + if s.URL != "" { + fmt.Fprintf(&b, " url: %q\n", s.URL) + } + if s.Transport != "" { + fmt.Fprintf(&b, " transport: %q\n", s.Transport) + } + if len(s.Env) > 0 { + b.WriteString(" env:\n") + keys := make([]string, 0, len(s.Env)) + for k := range s.Env { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + fmt.Fprintf(&b, " %s: %q\n", k, s.Env[k]) + } + } + } + return b.String() } func buildAgentNamespace(ns string) *unstructured.Unstructured { diff --git a/internal/serviceoffercontroller/agent_render_test.go b/internal/serviceoffercontroller/agent_render_test.go index dfc227c5..54a723aa 100644 --- a/internal/serviceoffercontroller/agent_render_test.go +++ b/internal/serviceoffercontroller/agent_render_test.go @@ -345,7 +345,7 @@ func agentConfigChecksum(t *testing.T, agent *monetizeapi.Agent) string { } func TestRenderHermesConfig_HasModelAndSkillsDir(t *testing.T) { - cfg := renderHermesConfig("qwen3.5:9b", "lit-key") + cfg := renderHermesConfig("qwen3.5:9b", "lit-key", nil) for _, must := range []string{ `default: "qwen3.5:9b"`, `api_key: "lit-key"`, @@ -363,15 +363,18 @@ func TestRenderHermesConfig_HasModelAndSkillsDir(t *testing.T) { // knobs so a single sale stays inside the 100s Cloudflare free-tunnel // window. If any of these drift it should fail loudly. func TestRenderHermesConfig_SubAgentConstraints(t *testing.T) { - cfg := renderHermesConfig("qwen3.5:9b", "lit-key") + cfg := renderHermesConfig("qwen3.5:9b", "lit-key", nil) for _, must := range []string{ - `timeout: 80`, - `lifetime_seconds: 90`, + `timeout: 170`, + `lifetime_seconds: 180`, `max_turns: 30`, `reasoning_effort: low`, `disabled_toolsets:`, `- memory`, `- web`, + // execute_code is blocked in unattended gateway turns (needs a human + // approval no one can grant); skills must shell out via `terminal`. + `- code_execution`, } { if !strings.Contains(cfg, must) { t.Errorf("hermes config missing sub-agent constraint %q\n---\n%s", must, cfg) @@ -389,6 +392,43 @@ func TestRenderHermesConfig_SubAgentConstraints(t *testing.T) { } } +// Per-agent mcp_servers render as a native Hermes MCP-server block. Empty -> no +// section; a stdio server renders command/args/env, env values verbatim (so +// ${VAR} placeholders survive for Hermes to interpolate from the pod env). +func TestRenderHermesConfig_MCPServers(t *testing.T) { + if got := renderHermesConfig("m", "k", nil); strings.Contains(got, "mcp_servers:") { + t.Errorf("empty MCPServers must not emit mcp_servers:\n%s", got) + } + servers := []monetizeapi.AgentMCPServer{{ + Name: "hyperliquid", + Command: "/opt/hermes/.venv/bin/python3", + Args: []string{"/data/.hermes/obol-skills/hyperliquid-intelligence/scripts/hl_mcp.py"}, + Env: map[string]string{ + "OBOL_SKILLS_DIR": "/data/.hermes/obol-skills", + "REMOTE_SIGNER_TOKEN": "${REMOTE_SIGNER_TOKEN}", + }, + }} + cfg := renderHermesConfig("m", "k", servers) + for _, must := range []string{ + "mcp_servers:", + " hyperliquid:", + ` command: "/opt/hermes/.venv/bin/python3"`, + " args:", + ` - "/data/.hermes/obol-skills/hyperliquid-intelligence/scripts/hl_mcp.py"`, + " env:", + ` OBOL_SKILLS_DIR: "/data/.hermes/obol-skills"`, + ` REMOTE_SIGNER_TOKEN: "${REMOTE_SIGNER_TOKEN}"`, + } { + if !strings.Contains(cfg, must) { + t.Errorf("mcp_servers render missing %q\n---\n%s", must, cfg) + } + } + // Env keys are sorted for deterministic output. + if strings.Index(cfg, "OBOL_SKILLS_DIR") > strings.Index(cfg, "REMOTE_SIGNER_TOKEN") { + t.Errorf("env keys not deterministically sorted\n%s", cfg) + } +} + // parseTerminalInt extracts the integer value of a `key: ` line from the // rendered Hermes config. Fails the test if the key is absent or unparsable. func parseTerminalInt(t *testing.T, cfg, key string) int { diff --git a/internal/serviceoffercontroller/render.go b/internal/serviceoffercontroller/render.go index 8481b077..1020ce90 100644 --- a/internal/serviceoffercontroller/render.go +++ b/internal/serviceoffercontroller/render.go @@ -265,7 +265,13 @@ func buildSkillCatalogConfigMap(content, servicesJSON, openAPIJSON, apiDocsHTML "services.json": servicesJSON, "openapi.json": openAPIJSON, "api.html": apiDocsHTML, - "httpd.conf": ".md:text/markdown\n.json:application/json\n.html:text/html\n", + // charset=utf-8 on the text types so UTF-8 content (em dashes + // in the catalog, accented operator descriptions, …) renders + // correctly instead of mojibake — busybox httpd otherwise sends + // a bare text/* type and clients fall back to Latin-1/CP1252. + // JSON is always UTF-8 by spec (RFC 8259), so it carries no + // charset param. + "httpd.conf": ".md:text/markdown; charset=utf-8\n.json:application/json\n.html:text/html; charset=utf-8\n", }, }, } @@ -899,6 +905,18 @@ func offerPublishedForRegistration(offer *monetizeapi.ServiceOffer) bool { isConditionTrue(offer.Status, "RoutePublished") } +// catalogModelName returns the model id to surface in the catalog, or "" to +// omit it. Agent offers run their own model and ignore the request `model` +// field, so the id is an internal detail and is never surfaced — mirrors the +// 402 page / extra / bazaar model-strip in internal/x402. Inference (and other +// model-bearing) offers keep their id, since there the buyer selects it. +func catalogModelName(offer *monetizeapi.ServiceOffer) string { + if offer.IsAgent() { + return "" + } + return offer.Spec.Model.Name +} + func buildSkillCatalogMarkdown(offers []*monetizeapi.ServiceOffer, baseURL string) string { baseURL = strings.TrimRight(baseURL, "/") @@ -955,7 +973,7 @@ func buildSkillCatalogMarkdown(offers []*monetizeapi.ServiceOffer, baseURL strin lines = append(lines, "| Service | Type | Model | Pay with | Status | Endpoint |") lines = append(lines, "|---------|------|-------|----------|--------|----------|") for _, offer := range ready { - modelName := offer.Spec.Model.Name + modelName := catalogModelName(offer) if modelName == "" { modelName = "—" } @@ -977,7 +995,7 @@ func buildSkillCatalogMarkdown(offers []*monetizeapi.ServiceOffer, baseURL strin } lines = append(lines, "", "## Service Details", "") for _, offer := range ready { - modelName := offer.Spec.Model.Name + modelName := catalogModelName(offer) endpoint := baseURL + offer.EffectivePath() lines = append(lines, fmt.Sprintf("### %s", offer.Name)) lines = append(lines, fmt.Sprintf("- **Endpoint**: `%s`", endpoint)) @@ -1164,13 +1182,12 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) if desc == "" { desc = fmt.Sprintf("x402 payment-gated %s service", fallbackOfferType(offer)) } - // type=agent offers leave spec.model empty by design (the model - // lives on the linked Agent). Fall back to the controller's - // resolved view so the storefront can display it. - modelName := offer.Spec.Model.Name - if modelName == "" && offer.Status.AgentResolution != nil { - modelName = offer.Status.AgentResolution.Model - } + // Agent offers run their own model and ignore the request `model` + // field, so the id is an internal detail and is never surfaced in + // the catalog — mirrors skill.md, the 402 page / extra, and the + // bazaar example. Inference (and other model-bearing) offers keep + // their id, since there the buyer selects it. + modelName := catalogModelName(offer) drainEndsAt := "" if offer.IsDraining() { diff --git a/internal/serviceoffercontroller/render_builders_test.go b/internal/serviceoffercontroller/render_builders_test.go index 5c3bf105..9b3e14e0 100644 --- a/internal/serviceoffercontroller/render_builders_test.go +++ b/internal/serviceoffercontroller/render_builders_test.go @@ -127,6 +127,11 @@ func TestBuildSkillCatalogConfigMap(t *testing.T) { if conf, _ := data["httpd.conf"].(string); !strings.Contains(conf, ".md:text/markdown") || !strings.Contains(conf, ".json:application/json") || !strings.Contains(conf, ".html:text/html") { t.Errorf("httpd.conf missing required mime mappings: %q", conf) } + // Text types must declare charset=utf-8 or UTF-8 content (em dashes, + // accented descriptions) renders as Latin-1 mojibake. + if conf, _ := data["httpd.conf"].(string); !strings.Contains(conf, ".md:text/markdown; charset=utf-8") || !strings.Contains(conf, ".html:text/html; charset=utf-8") { + t.Errorf("httpd.conf text types missing charset=utf-8: %q", conf) + } // Managed-by label so the controller owns cleanup on uninstall. lbls, _ := cm.Object["metadata"].(map[string]any)["labels"].(map[string]any) if lbls["obol.org/managed-by"] != "serviceoffer-controller" { diff --git a/internal/serviceoffercontroller/render_test.go b/internal/serviceoffercontroller/render_test.go index c903a338..7be3df61 100644 --- a/internal/serviceoffercontroller/render_test.go +++ b/internal/serviceoffercontroller/render_test.go @@ -706,6 +706,55 @@ func TestBuildSkillCatalogMarkdown_DrainAdditiveDetail(t *testing.T) { } } +// TestBuildSkillCatalogMarkdown_AgentModelStripped locks in that agent offers +// never surface their underlying model in the catalog (the agent runs its own +// model and ignores the request `model` field — it's an internal detail), while +// inference offers keep it (there the buyer selects the model). Mirrors the +// 402 page / extra / bazaar model-strip in internal/x402. +func TestBuildSkillCatalogMarkdown_AgentModelStripped(t *testing.T) { + readyCond := []monetizeapi.Condition{{Type: "Ready", Status: "True"}} + agentOffer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "analyst", Namespace: "agent-analyst"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "agent", + Model: monetizeapi.ServiceOfferModel{Name: "gemma4-aeon-uncensored"}, + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base-sepolia", + PayTo: "0x1111111111111111111111111111111111111111", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.01"}, + }, + }, + Status: monetizeapi.ServiceOfferStatus{Conditions: readyCond}, + } + inferenceOffer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "raw-llm", Namespace: "llm"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "inference", + Model: monetizeapi.ServiceOfferModel{Name: "qwen36-deep"}, + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base-sepolia", + PayTo: "0x2222222222222222222222222222222222222222", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.001"}, + }, + }, + Status: monetizeapi.ServiceOfferStatus{Conditions: readyCond}, + } + + content := buildSkillCatalogMarkdown( + []*monetizeapi.ServiceOffer{agentOffer, inferenceOffer}, + "https://example.com", + ) + + // Agent: model never appears (table column is "—", no **Model** detail). + if strings.Contains(content, "gemma4-aeon-uncensored") { + t.Errorf("agent offer leaked its internal model into the catalog:\n%s", content) + } + // Inference: model is buyer-facing and must stay (table + detail bullet). + if !strings.Contains(content, "- **Model**: qwen36-deep") { + t.Errorf("inference offer dropped its (buyer-selectable) model bullet:\n%s", content) + } +} + func TestBuildSkillCatalogHTTPRoute(t *testing.T) { route := buildSkillCatalogHTTPRoute() if route.GetName() != skillCatalogRouteName { @@ -883,7 +932,7 @@ func TestBuildServiceCatalogJSON_Empty(t *testing.T) { } } -func TestBuildServiceCatalogJSON_AgentOfferUsesResolvedModel(t *testing.T) { +func TestBuildServiceCatalogJSON_AgentOfferOmitsInternalModel(t *testing.T) { offer := &monetizeapi.ServiceOffer{ ObjectMeta: metav1.ObjectMeta{Name: "demo-quant", Namespace: "agent-demo-quant"}, Spec: monetizeapi.ServiceOfferSpec{ @@ -928,8 +977,14 @@ func TestBuildServiceCatalogJSON_AgentOfferUsesResolvedModel(t *testing.T) { if svc.Type != "agent" { t.Errorf("type = %q, want agent", svc.Type) } - if svc.Model != "qwen3.5:9b" { - t.Errorf("model = %q, want qwen3.5:9b", svc.Model) + // An agent runs its own model and ignores the request `model` field, so + // the id is an internal detail and must never be surfaced in the catalog + // (mirrors skill.md, the 402 page/extra, and the bazaar example). + if svc.Model != "" { + t.Errorf("model = %q, want empty (internal model must not leak for agent offers)", svc.Model) + } + if strings.Contains(jsonStr, "qwen3.5:9b") { + t.Errorf("internal model leaked into catalog JSON:\n%s", jsonStr) } if svc.Price != "10 OBOL/request" { t.Errorf("price = %q, want 10 OBOL/request", svc.Price) diff --git a/internal/x402/agent_extras_test.go b/internal/x402/agent_extras_test.go index 66d35614..1af20366 100644 --- a/internal/x402/agent_extras_test.go +++ b/internal/x402/agent_extras_test.go @@ -24,7 +24,7 @@ func TestMergeAgentExtras_Noop_NonAgentRule(t *testing.T) { } } -func TestMergeAgentExtras_AddsAllAgentFields(t *testing.T) { +func TestMergeAgentExtras_AddsAgentFieldsButNotModel(t *testing.T) { req := x402types.PaymentRequirements{Extra: map[string]any{}} rule := &RouteRule{ AgentModel: "qwen3.5:9b", @@ -34,8 +34,8 @@ func TestMergeAgentExtras_AddsAllAgentFields(t *testing.T) { mergeAgentExtras(&req, rule) - if got := req.Extra["agentModel"]; got != "qwen3.5:9b" { - t.Errorf("agentModel = %v, want qwen3.5:9b", got) + if _, ok := req.Extra["agentModel"]; ok { + t.Error("agentModel must not be surfaced — the underlying model is an internal detail, not buyer-facing") } if got := req.Extra["agentRuntime"]; got != "hermes" { t.Errorf("agentRuntime = %v", got) @@ -55,14 +55,17 @@ func TestMergeAgentExtras_InitialisesNilExtra(t *testing.T) { // mergeAgentExtras must still cope with a nil map for callers that // build PaymentRequirements directly (e.g. tests). req := x402types.PaymentRequirements{} - rule := &RouteRule{AgentModel: "qwen3.5:9b"} + rule := &RouteRule{AgentRuntime: "hermes"} mergeAgentExtras(&req, rule) if req.Extra == nil { t.Fatal("Extra not initialised") } - if req.Extra["agentModel"] != "qwen3.5:9b" { - t.Errorf("agentModel missing: %+v", req.Extra) + if _, ok := req.Extra["agentModel"]; ok { + t.Error("agentModel must not be surfaced") + } + if req.Extra["agentRuntime"] != "hermes" { + t.Errorf("agentRuntime missing: %+v", req.Extra) } } diff --git a/internal/x402/bazaar.go b/internal/x402/bazaar.go index db3bf8ee..29382e2d 100644 --- a/internal/x402/bazaar.go +++ b/internal/x402/bazaar.go @@ -54,8 +54,15 @@ func WithBazaar(extensions map[string]any, offerType, model string) map[string]a // gets the generic operator-defined JSON shape. func BuildBazaarExtension(offerType, model string) map[string]any { switch normalizeOfferType(offerType) { - case "inference", "agent": + case "inference": + // The buyer selects the model (paid/), so the real id + // is buyer-facing and correct to advertise in the example. return bazaarChatCompletions(model) + case "agent": + // An agent runs its own model and ignores the request `model` field, + // so the model id is an internal detail, not buyer-facing. Seed the + // chat example with the neutral placeholder rather than the real id. + return bazaarChatCompletions("") default: return bazaarGenericJSON() } diff --git a/internal/x402/bazaar_test.go b/internal/x402/bazaar_test.go index e1269af0..0d53b9da 100644 --- a/internal/x402/bazaar_test.go +++ b/internal/x402/bazaar_test.go @@ -33,7 +33,7 @@ func TestBuildBazaarExtension(t *testing.T) { wantModel string }{ {"inference", "llama-3-70b", "llama-3-70b"}, - {"agent", "qwen3.5:9b", "qwen3.5:9b"}, + {"agent", "qwen3.5:9b", "your-model-id"}, // agent model is internal — placeholder, never the real id {"inference", "", "your-model-id"}, {"http", "", ""}, {"", "", ""}, // static config routes fall back to the generic shape diff --git a/internal/x402/chains.go b/internal/x402/chains.go index 6e652d9d..b097fb18 100644 --- a/internal/x402/chains.go +++ b/internal/x402/chains.go @@ -66,12 +66,16 @@ var ( } ChainBaseSepolia = ChainInfo{ - Name: "base-sepolia", - NetworkID: "base-sepolia", - CAIP2Network: "eip155:84532", - USDCAddress: "0x036CbD53842c5426634e7929541eC2318f3dCF7e", - Decimals: 6, - EIP3009Name: "USD Coin", + Name: "base-sepolia", + NetworkID: "base-sepolia", + CAIP2Network: "eip155:84532", + USDCAddress: "0x036CbD53842c5426634e7929541eC2318f3dCF7e", + Decimals: 6, + // Base-Sepolia USDC is FiatTokenV2_2 whose EIP-712 domain name is + // "USDC", NOT the mainnet "USD Coin". Advertising "USD Coin" makes a + // real facilitator reject otherwise-valid signatures — the recurring + // base-sepolia "name" bug that a stub facilitator silently masks. + EIP3009Name: "USDC", EIP3009Version: "2", } diff --git a/internal/x402/paymentrequired.go b/internal/x402/paymentrequired.go index d23c261d..ed920873 100644 --- a/internal/x402/paymentrequired.go +++ b/internal/x402/paymentrequired.go @@ -414,13 +414,13 @@ func inferenceCopy(url, siteURL string, d PaymentDisplay) typeCopy { "pre-authorizes the provider through your agent's wallet and registers the model as " + "paid/<model> in your local LiteLLM gateway, so every agent in your stack " + "can call it like any other OpenAI-compatible model."), - ShowPrimary: true, - PrimaryTitle: "Use this service for your Obol Agent's model", - PrimaryLede: "Run this from your obol-stack host. The CLI walks `/api/services.json`, prompts for auto-refill + a request count, and pre-signs the authorizations from your master agent's wallet. Pass `--yes --count ` for non-interactive runs.", - PrimaryIsCode: true, - PrimaryPayload: cmd, - PromptObol: prompt, - PromptOther: other, + ShowPrimary: true, + PrimaryTitle: "Use this service for your Obol Agent's model", + PrimaryLede: "Run this from your obol-stack host. The CLI walks `/api/services.json`, prompts for auto-refill + a request count, and pre-signs the authorizations from your master agent's wallet. Pass `--yes --count ` for non-interactive runs.", + PrimaryIsCode: true, + PrimaryPayload: cmd, + PromptObol: prompt, + PromptOther: other, ChatCompletionsNote: "Direct HTTP buyers use OpenAI-style chat-completions. A minimal paid request looks like:", ChatCompletionsBody: fmt.Sprintf(`POST %s/v1/chat/completions Content-Type: application/json @@ -441,40 +441,35 @@ X-PAYMENT: // Other-AI-Agent prompt cards drive the action, and a chat-completions // example sits next to the raw x402 JSON in the Pay-manually card to // make the wire shape obvious to readers walking the spec by hand. -func agentCopy(url, siteURL string, d PaymentDisplay) typeCopy { - model := sanitizeDisplayToken(d.Model, "") - modelClause := "" - modelLine := "" - if model != "" { - modelClause = fmt.Sprintf(`"model": "%s",`, model) - modelLine = " (running " + model + ")" - } - +func agentCopy(url, siteURL string, _ PaymentDisplay) typeCopy { + // Deliberately no model: an Obol Agent runs its own model, skills, and + // memory — the buyer never picks one. Surfacing the underlying model here + // is noise (and the chat-completions `model` field is ignored by the + // agent), so the example omits it entirely. body := fmt.Sprintf(`POST %s Content-Type: application/json X-PAYMENT: { - %s "messages": [ {"role": "user", "content": ""} ] -}`, url, modelClause) +}`, url) - modelFlag := sanitizeDisplayToken(d.Model, "") prompt := fmt.Sprintf( - "Use the buy-x402 skill's `pay-agent` command to buy one round of work from this Obol Agent%s. "+ - "This is an *agent*, not a raw model — it has its own skills, tools, and memory. Example:\n\n"+ - "pay-agent %s --model %s --message \"\"", - modelLine, url, modelFlag, + "Use the buy-x402 skill's `pay-agent` command to buy one round of work from this Obol Agent. "+ + "This is an *agent*, not a raw model — it has its own skills, tools, and memory (and picks "+ + "its own model), so you only send it a prompt. Example:\n\n"+ + "pay-agent %s --message \"\"", + url, ) other := fmt.Sprintf( - "Help me call the Obol Agent at %s%s — it's an autonomous agent (tools + skills + memory), "+ + "Help me call the Obol Agent at %s — it's an autonomous agent (tools + skills + memory), "+ "not a raw LLM. It's gated by %s. POST OpenAI-style chat-completions JSON with a real "+ "prompt in `messages`, attach a signed EIP-3009/Permit2 authorization as `X-PAYMENT`, "+ "and report what the agent does.", - url, modelLine, x402GuideRef(siteURL), + url, x402GuideRef(siteURL), ) return typeCopy{ diff --git a/internal/x402/paymentrequired_test.go b/internal/x402/paymentrequired_test.go index a9ef9563..9cdcf0be 100644 --- a/internal/x402/paymentrequired_test.go +++ b/internal/x402/paymentrequired_test.go @@ -252,9 +252,10 @@ func TestHTMLAware_AgentShowsChatCompletionsInPayManually(t *testing.T) { } mustContain(t, body, "Pay manually (raw HTTP 402)") mustContain(t, body, "Obol Agents accept OpenAI-style chat-completions bodies") - // Example chat-completions body (JSON snippet inside
; html/template
-	// escapes the quotes).
-	mustContain(t, body, `"model": "qwen3.5:9b"`)
+	// The agent runs its own model — the real id must never leak into the
+	// 402 page (neither the hand-written example nor the embedded bazaar
+	// JSON). The bazaar example carries a neutral placeholder instead.
+	mustNotContain(t, body, "qwen3.5:9b")
 	mustContain(t, body, `"messages":`)
 
 	// Lede uses the operator-facing copy and links to docs.obol.org.
@@ -387,6 +388,13 @@ func mustContain(t *testing.T, haystack, needle string) {
 	}
 }
 
+func mustNotContain(t *testing.T, haystack, needle string) {
+	t.Helper()
+	if strings.Contains(haystack, needle) {
+		t.Errorf("body unexpectedly contains %q", needle)
+	}
+}
+
 // sanitizeDisplayToken must pass real model ids / offer names through
 // untouched while collapsing anything carrying shell metacharacters to the
 // placeholder — the values land in copy-pasteable commands on the public
diff --git a/internal/x402/tokens.go b/internal/x402/tokens.go
index 93b343ae..fc437ad8 100644
--- a/internal/x402/tokens.go
+++ b/internal/x402/tokens.go
@@ -44,7 +44,7 @@ type TokenEntry struct {
 var tokenRegistry = map[string]map[string]TokenEntry{
 	"USDC": {
 		"base":             {Address: ChainBaseMainnet.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USD Coin", EIP712Version: "2"},
-		"base-sepolia":     {Address: ChainBaseSepolia.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USD Coin", EIP712Version: "2"},
+		"base-sepolia":     {Address: ChainBaseSepolia.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USDC", EIP712Version: "2"},
 		"ethereum":         {Address: ChainEthereumMainnet.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USD Coin", EIP712Version: "2"},
 		"polygon":          {Address: ChainPolygonMainnet.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USD Coin", EIP712Version: "2"},
 		"polygon-amoy":     {Address: ChainPolygonAmoy.USDCAddress, Symbol: "USDC", Decimals: 6, TransferMethod: "eip3009", EIP712Name: "USD Coin", EIP712Version: "2"},
diff --git a/internal/x402/verifier.go b/internal/x402/verifier.go
index 264522af..ab42479f 100644
--- a/internal/x402/verifier.go
+++ b/internal/x402/verifier.go
@@ -444,19 +444,18 @@ func patternToPrefix(pattern string) string {
 	return strings.TrimSuffix(pattern, "*")
 }
 
-// mergeAgentExtras adds the agent fields from a RouteRule to the
-// requirement's Extra map so buyers probing a 402 see which model and
-// skills are powering the offer. No-op for non-agent rules.
+// mergeAgentExtras adds agent metadata from a RouteRule to the requirement's
+// Extra map so buyers probing a 402 can tell it's an agent. The underlying
+// model is intentionally NOT surfaced: an Obol Agent runs its own model and
+// the buyer never selects one, so the model id is an internal detail, not
+// buyer-facing info. No-op for non-agent rules.
 func mergeAgentExtras(req *x402types.PaymentRequirements, rule *RouteRule) {
-	if rule.AgentModel == "" && len(rule.AgentSkills) == 0 && rule.AgentRuntime == "" {
+	if len(rule.AgentSkills) == 0 && rule.AgentRuntime == "" {
 		return
 	}
 	if req.Extra == nil {
 		req.Extra = make(map[string]interface{})
 	}
-	if rule.AgentModel != "" {
-		req.Extra["agentModel"] = rule.AgentModel
-	}
 	if len(rule.AgentSkills) > 0 {
 		// Materialise as []any so JSON marshalling produces a proper array
 		// regardless of whether the source loaded it from yaml or