From 44e6950274c6cbbad539f8ff8b128656480dfd57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= Date: Wed, 29 Apr 2026 01:18:55 +0100 Subject: [PATCH] Fix up old references to qwen3 and a 0.6b model --- CLAUDE.md | 2 +- cmd/obol/model.go | 10 ++++++---- docs/getting-started.md | 4 ++-- docs/guides/monetize-inference.md | 16 ++++++++-------- flows/flow-03-inference.sh | 4 ++-- flows/lib.sh | 2 +- internal/embed/skills/monetize-guide/SKILL.md | 3 ++- internal/embed/skills/sell/SKILL.md | 2 +- internal/openclaw/monetize_integration_test.go | 2 +- internal/openclaw/openclaw.go | 2 +- 10 files changed, 25 insertions(+), 22 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index a728a9ab..d1afd992 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -224,7 +224,7 @@ obol stack up # cluster + base # while `qwen36-fast` (no `:Nb` tag) ranks 0, so the agent stays on the slow # host model. This is the easy footgun. obol model remove qwen3.5:9b -obol model remove qwen3:0.6b +obol model remove qwen3.5:4b obol model setup custom \ --name spark1-vllm \ diff --git a/cmd/obol/model.go b/cmd/obol/model.go index 726285fa..375885e9 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -148,7 +148,7 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error { if len(ollamaModels) == 0 { u.Warn("No models pulled in Ollama") u.Print("") - u.Print(" Hint: Pull a model with: ollama pull qwen3:8b (or qwen3.6:27b on hosts with ≥32GB RAM)") + u.Print(" Hint: Pull a model with: ollama pull qwen3.5:9b (or qwen3.6:27b on hosts with ≥32GB RAM)") u.Print(" Hint: Or run: obol model pull") return errors.New("ollama is running but has no models") @@ -576,8 +576,9 @@ func promptModelPull(u *ui.UI) (string, error) { suggestions := []string{ "qwen3.6:27b (17 GB) — High-quality general-purpose (recommended, needs ≥32GB RAM)", - "qwen3.6:27b-coding-mxfp8 (~13 GB) — Code generation (Qwen3.6, MXFP8 quant)", - "qwen3:8b (5.2 GB) — Fast general-purpose, laptop-friendly", + "qwen3.6:27b-coding-mxfp8 (31 GB) — Code generation (Qwen3.6, MXFP8 quant)", + "qwen3.5:9b (6.6 GB) — Validated baseline; fits on most laptops", + "qwen3.5:4b (3.4 GB) — Smallest current Qwen, low-RAM laptops", "deepseek-r1:8b (4.9 GB) — Reasoning", "gemma3:4b (3.3 GB) — Lightweight, multilingual", "Other (enter name)", @@ -585,7 +586,8 @@ func promptModelPull(u *ui.UI) (string, error) { modelNames := []string{ "qwen3.6:27b", "qwen3.6:27b-coding-mxfp8", - "qwen3:8b", + "qwen3.5:9b", + "qwen3.5:4b", "deepseek-r1:8b", "gemma3:4b", } diff --git a/docs/getting-started.md b/docs/getting-started.md index 94dee367..8f4a3958 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -88,9 +88,9 @@ curl -s http://localhost:11434/api/tags | python3 -m json.tool If you don't have a model yet, pull one: ```bash -ollama pull qwen3.5:35b # Large model with tool-call support +ollama pull qwen3.5:9b # Validated baseline, ~6.6 GB # Or a smaller model for quick testing: -ollama pull qwen3:0.6b +ollama pull qwen3.5:4b # ~3.4 GB ``` ### 3b. Verify LiteLLM can reach Ollama diff --git a/docs/guides/monetize-inference.md b/docs/guides/monetize-inference.md index 4ffd2e94..75ba5f10 100644 --- a/docs/guides/monetize-inference.md +++ b/docs/guides/monetize-inference.md @@ -98,7 +98,7 @@ Make sure the model is available in your host Ollama: ollama pull qwen3.5:9b # Or a smaller model for quick testing -ollama pull qwen3:0.6b +ollama pull qwen3.5:4b # Verify it's available curl -s http://localhost:11434/api/tags | python3 -m json.tool @@ -250,7 +250,7 @@ curl -s -X POST "$TUNNEL_URL/rpc" \ curl -s -w "\nHTTP %{http_code}" -X POST \ "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' # ERC-8004 registration document (200) curl -s "$TUNNEL_URL/.well-known/agent-registration.json" | jq . @@ -262,7 +262,7 @@ You can also verify locally (bypasses Cloudflare): curl -s -w "\nHTTP %{http_code}" -X POST \ "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' ``` A **402 Payment Required** response confirms the x402 gate is working. The response body contains the payment requirements: @@ -323,7 +323,7 @@ Send a request without payment: ```bash curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' \ + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' \ -D - 2>&1 | head -30 ``` @@ -347,7 +347,7 @@ client = LLMClient( ) # Automatically: 402 -> sign EIP-712 -> retry with payment header -> 200 -response = client.chat("qwen3:0.6b", "Explain Ethereum in one sentence.") +response = client.chat("qwen3.5:9b", "Explain Ethereum in one sentence.") print(f"Response: {response}") print(f"Session cost: ${client._session_total_usd}") ``` @@ -369,7 +369,7 @@ The SDK handles the full x402 flow: # Step 1: Get payment requirements from the 402 response curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' # Step 2: Sign the EIP-712 payment (requires SDK or custom code) # The 402 body contains: payTo, amount, asset, network, extra.name, extra.version @@ -380,7 +380,7 @@ curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ -H "X-PAYMENT: " \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' # -> 200 OK + inference response ``` @@ -411,7 +411,7 @@ export TUNNEL_URL=$(obol tunnel status | grep -oE 'https://[a-z0-9-]+\.trycloudf curl -s -w "\nHTTP %{http_code}" -X POST \ "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \ -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' + -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' # Paid request through tunnel (supported production path) # The buyer talks to LiteLLM, which routes paid models through the in-pod diff --git a/flows/flow-03-inference.sh b/flows/flow-03-inference.sh index 1bf83de9..9ed31c9b 100755 --- a/flows/flow-03-inference.sh +++ b/flows/flow-03-inference.sh @@ -41,8 +41,8 @@ for i in $(seq 1 15); do sleep 2 done -# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL qwen3:0.6b -# is only registered in Ollama directly; the x402 sell/buy flows use it via that path) +# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL is the +# default in flows/lib.sh; the x402 sell/buy flows route through it directly) LITELLM_MODEL="qwen3.5:9b" out=$(curl -sf --max-time 120 -X POST http://localhost:8001/v1/chat/completions \ -H "Content-Type: application/json" \ diff --git a/flows/lib.sh b/flows/lib.sh index 77f35f27..a0648a23 100755 --- a/flows/lib.sh +++ b/flows/lib.sh @@ -328,7 +328,7 @@ route_llm_via_obol_cli() { local existing existing=$($runner model list 2>/dev/null || true) local entry - for entry in qwen3.5:9b qwen3:0.6b; do + for entry in qwen3.5:9b qwen3.5:4b; do if printf '%s' "$existing" | grep -Fq "$entry"; then $runner model remove "$entry" --no-sync >/dev/null 2>&1 || true fi diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md index 1c5f7721..01cfca6e 100644 --- a/internal/embed/skills/monetize-guide/SKILL.md +++ b/internal/embed/skills/monetize-guide/SKILL.md @@ -62,7 +62,8 @@ for m in data.get('models', []): Report the available models to the user. If no models are found, suggest they pull one: ```bash -ollama pull qwen3:8b # Laptop-friendly, ~5 GB +ollama pull qwen3.5:4b # Smallest current Qwen, ~3.4 GB (low-RAM laptops) +ollama pull qwen3.5:9b # Validated baseline, ~6.6 GB ollama pull qwen3.6:27b # High quality, ~17 GB (needs ≥32GB RAM) ``` diff --git a/internal/embed/skills/sell/SKILL.md b/internal/embed/skills/sell/SKILL.md index 6fa09cb1..07e85b6d 100644 --- a/internal/embed/skills/sell/SKILL.md +++ b/internal/embed/skills/sell/SKILL.md @@ -30,7 +30,7 @@ python3 scripts/monetize.py list # Create a new offer to monetize a local Ollama model python3 scripts/monetize.py create my-inference \ - --model qwen3:8b \ + --model qwen3.5:9b \ --runtime ollama \ --upstream ollama \ --namespace llm \ diff --git a/internal/openclaw/monetize_integration_test.go b/internal/openclaw/monetize_integration_test.go index df030b94..dc27e9bf 100644 --- a/internal/openclaw/monetize_integration_test.go +++ b/internal/openclaw/monetize_integration_test.go @@ -2890,7 +2890,7 @@ func TestIntegration_Fork_RealFacilitatorPayment(t *testing.T) { // // Prerequisites: // - Running k3d cluster with CRD, agent, x402-verifier, CF quick tunnel -// - Ollama with a cached model (any model — qwen2.5, qwen3:0.6b, etc.) +// - Ollama with a cached model (any model — qwen3.5:4b, qwen3.5:9b, etc.) // - Anvil (Foundry) installed // - x402-rs source or binary (set X402_RS_DIR or X402_FACILITATOR_BIN) func TestIntegration_Tunnel_RealFacilitatorOllama(t *testing.T) { diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index a3bceef2..eb8774e7 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -137,7 +137,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error { } else { u.Successf("Local Ollama detected at %s (no models pulled)", ollamaEndpoint()) u.Print(" Run 'obol model setup' to configure a cloud provider,") - u.Print(" or pull a model with: ollama pull qwen3:8b (or qwen3.6:27b on hosts with ≥32GB RAM)") + u.Print(" or pull a model with: ollama pull qwen3.5:9b (or qwen3.6:27b on hosts with ≥32GB RAM)") } } else { u.Warnf("Local Ollama not detected on host (%s)", ollamaEndpoint())