From 44e6950274c6cbbad539f8ff8b128656480dfd57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Wed, 29 Apr 2026 01:18:55 +0100
Subject: [PATCH] Fix up old references to qwen3 and a 0.6b model

---
 CLAUDE.md                                      |  2 +-
 cmd/obol/model.go                              | 10 ++++++----
 docs/getting-started.md                        |  4 ++--
 docs/guides/monetize-inference.md              | 16 ++++++++--------
 flows/flow-03-inference.sh                     |  4 ++--
 flows/lib.sh                                   |  2 +-
 internal/embed/skills/monetize-guide/SKILL.md  |  3 ++-
 internal/embed/skills/sell/SKILL.md            |  2 +-
 internal/openclaw/monetize_integration_test.go |  2 +-
 internal/openclaw/openclaw.go                  |  2 +-
 10 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index a728a9ab..d1afd992 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -224,7 +224,7 @@ obol stack up                                                  # cluster + base
 # while `qwen36-fast` (no `:Nb` tag) ranks 0, so the agent stays on the slow
 # host model. This is the easy footgun.
 obol model remove qwen3.5:9b
-obol model remove qwen3:0.6b
+obol model remove qwen3.5:4b
 
 obol model setup custom \
     --name spark1-vllm \
diff --git a/cmd/obol/model.go b/cmd/obol/model.go
index 726285fa..375885e9 100644
--- a/cmd/obol/model.go
+++ b/cmd/obol/model.go
@@ -148,7 +148,7 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error {
 		if len(ollamaModels) == 0 {
 			u.Warn("No models pulled in Ollama")
 			u.Print("")
-			u.Print("  Hint: Pull a model with: ollama pull qwen3:8b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
+			u.Print("  Hint: Pull a model with: ollama pull qwen3.5:9b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
 			u.Print("  Hint: Or run: obol model pull")
 
 			return errors.New("ollama is running but has no models")
@@ -576,8 +576,9 @@ func promptModelPull(u *ui.UI) (string, error) {
 
 	suggestions := []string{
 		"qwen3.6:27b              (17 GB) — High-quality general-purpose (recommended, needs ≥32GB RAM)",
-		"qwen3.6:27b-coding-mxfp8 (~13 GB) — Code generation (Qwen3.6, MXFP8 quant)",
-		"qwen3:8b                 (5.2 GB) — Fast general-purpose, laptop-friendly",
+		"qwen3.6:27b-coding-mxfp8 (31 GB) — Code generation (Qwen3.6, MXFP8 quant)",
+		"qwen3.5:9b               (6.6 GB) — Validated baseline; fits on most laptops",
+		"qwen3.5:4b               (3.4 GB) — Smallest current Qwen, low-RAM laptops",
 		"deepseek-r1:8b           (4.9 GB) — Reasoning",
 		"gemma3:4b                (3.3 GB) — Lightweight, multilingual",
 		"Other (enter name)",
@@ -585,7 +586,8 @@ func promptModelPull(u *ui.UI) (string, error) {
 	modelNames := []string{
 		"qwen3.6:27b",
 		"qwen3.6:27b-coding-mxfp8",
-		"qwen3:8b",
+		"qwen3.5:9b",
+		"qwen3.5:4b",
 		"deepseek-r1:8b",
 		"gemma3:4b",
 	}
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 94dee367..8f4a3958 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -88,9 +88,9 @@ curl -s http://localhost:11434/api/tags | python3 -m json.tool
 If you don't have a model yet, pull one:
 
 ```bash
-ollama pull qwen3.5:35b   # Large model with tool-call support
+ollama pull qwen3.5:9b    # Validated baseline, ~6.6 GB
 # Or a smaller model for quick testing:
-ollama pull qwen3:0.6b
+ollama pull qwen3.5:4b    # ~3.4 GB
 ```
 
 ### 3b. Verify LiteLLM can reach Ollama
diff --git a/docs/guides/monetize-inference.md b/docs/guides/monetize-inference.md
index 4ffd2e94..75ba5f10 100644
--- a/docs/guides/monetize-inference.md
+++ b/docs/guides/monetize-inference.md
@@ -98,7 +98,7 @@ Make sure the model is available in your host Ollama:
 ollama pull qwen3.5:9b
 
 # Or a smaller model for quick testing
-ollama pull qwen3:0.6b
+ollama pull qwen3.5:4b
 
 # Verify it's available
 curl -s http://localhost:11434/api/tags | python3 -m json.tool
@@ -250,7 +250,7 @@ curl -s -X POST "$TUNNEL_URL/rpc" \
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # ERC-8004 registration document (200)
 curl -s "$TUNNEL_URL/.well-known/agent-registration.json" | jq .
@@ -262,7 +262,7 @@ You can also verify locally (bypasses Cloudflare):
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 ```
 
 A **402 Payment Required** response confirms the x402 gate is working. The response body contains the payment requirements:
@@ -323,7 +323,7 @@ Send a request without payment:
 ```bash
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' \
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' \
     -D - 2>&1 | head -30
 ```
 
@@ -347,7 +347,7 @@ client = LLMClient(
 )
 
 # Automatically: 402 -> sign EIP-712 -> retry with payment header -> 200
-response = client.chat("qwen3:0.6b", "Explain Ethereum in one sentence.")
+response = client.chat("qwen3.5:9b", "Explain Ethereum in one sentence.")
 print(f"Response: {response}")
 print(f"Session cost: ${client._session_total_usd}")
 ```
@@ -369,7 +369,7 @@ The SDK handles the full x402 flow:
 # Step 1: Get payment requirements from the 402 response
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # Step 2: Sign the EIP-712 payment (requires SDK or custom code)
 # The 402 body contains: payTo, amount, asset, network, extra.name, extra.version
@@ -380,7 +380,7 @@ curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -H "X-PAYMENT: <base64-encoded-x402-envelope>" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 # -> 200 OK + inference response
 ```
 
@@ -411,7 +411,7 @@ export TUNNEL_URL=$(obol tunnel status | grep -oE 'https://[a-z0-9-]+\.trycloudf
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # Paid request through tunnel (supported production path)
 # The buyer talks to LiteLLM, which routes paid models through the in-pod
diff --git a/flows/flow-03-inference.sh b/flows/flow-03-inference.sh
index 1bf83de9..9ed31c9b 100755
--- a/flows/flow-03-inference.sh
+++ b/flows/flow-03-inference.sh
@@ -41,8 +41,8 @@ for i in $(seq 1 15); do
     sleep 2
 done
 
-# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL qwen3:0.6b
-# is only registered in Ollama directly; the x402 sell/buy flows use it via that path)
+# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL is the
+# default in flows/lib.sh; the x402 sell/buy flows route through it directly)
 LITELLM_MODEL="qwen3.5:9b"
 out=$(curl -sf --max-time 120 -X POST http://localhost:8001/v1/chat/completions \
     -H "Content-Type: application/json" \
diff --git a/flows/lib.sh b/flows/lib.sh
index 77f35f27..a0648a23 100755
--- a/flows/lib.sh
+++ b/flows/lib.sh
@@ -328,7 +328,7 @@ route_llm_via_obol_cli() {
     local existing
     existing=$($runner model list 2>/dev/null || true)
     local entry
-    for entry in qwen3.5:9b qwen3:0.6b; do
+    for entry in qwen3.5:9b qwen3.5:4b; do
         if printf '%s' "$existing" | grep -Fq "$entry"; then
             $runner model remove "$entry" --no-sync >/dev/null 2>&1 || true
         fi
diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md
index 1c5f7721..01cfca6e 100644
--- a/internal/embed/skills/monetize-guide/SKILL.md
+++ b/internal/embed/skills/monetize-guide/SKILL.md
@@ -62,7 +62,8 @@ for m in data.get('models', []):
 
 Report the available models to the user. If no models are found, suggest they pull one:
 ```bash
-ollama pull qwen3:8b      # Laptop-friendly, ~5 GB
+ollama pull qwen3.5:4b    # Smallest current Qwen, ~3.4 GB (low-RAM laptops)
+ollama pull qwen3.5:9b    # Validated baseline, ~6.6 GB
 ollama pull qwen3.6:27b   # High quality, ~17 GB (needs ≥32GB RAM)
 ```
 
diff --git a/internal/embed/skills/sell/SKILL.md b/internal/embed/skills/sell/SKILL.md
index 6fa09cb1..07e85b6d 100644
--- a/internal/embed/skills/sell/SKILL.md
+++ b/internal/embed/skills/sell/SKILL.md
@@ -30,7 +30,7 @@ python3 scripts/monetize.py list
 
 # Create a new offer to monetize a local Ollama model
 python3 scripts/monetize.py create my-inference \
-  --model qwen3:8b \
+  --model qwen3.5:9b \
   --runtime ollama \
   --upstream ollama \
   --namespace llm \
diff --git a/internal/openclaw/monetize_integration_test.go b/internal/openclaw/monetize_integration_test.go
index df030b94..dc27e9bf 100644
--- a/internal/openclaw/monetize_integration_test.go
+++ b/internal/openclaw/monetize_integration_test.go
@@ -2890,7 +2890,7 @@ func TestIntegration_Fork_RealFacilitatorPayment(t *testing.T) {
 //
 // Prerequisites:
 //   - Running k3d cluster with CRD, agent, x402-verifier, CF quick tunnel
-//   - Ollama with a cached model (any model — qwen2.5, qwen3:0.6b, etc.)
+//   - Ollama with a cached model (any model — qwen3.5:4b, qwen3.5:9b, etc.)
 //   - Anvil (Foundry) installed
 //   - x402-rs source or binary (set X402_RS_DIR or X402_FACILITATOR_BIN)
 func TestIntegration_Tunnel_RealFacilitatorOllama(t *testing.T) {
diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go
index a3bceef2..eb8774e7 100644
--- a/internal/openclaw/openclaw.go
+++ b/internal/openclaw/openclaw.go
@@ -137,7 +137,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error {
 			} else {
 				u.Successf("Local Ollama detected at %s (no models pulled)", ollamaEndpoint())
 				u.Print("  Run 'obol model setup' to configure a cloud provider,")
-				u.Print("  or pull a model with: ollama pull qwen3:8b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
+				u.Print("  or pull a model with: ollama pull qwen3.5:9b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
 			}
 		} else {
 			u.Warnf("Local Ollama not detected on host (%s)", ollamaEndpoint())