ObolNetwork · OisinKyne · Apr 29, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -224,7 +224,7 @@ obol stack up                                                  # cluster + base
 # while `qwen36-fast` (no `:Nb` tag) ranks 0, so the agent stays on the slow
 # host model. This is the easy footgun.
 obol model remove qwen3.5:9b
-obol model remove qwen3:0.6b
+obol model remove qwen3.5:4b
 
 obol model setup custom \
     --name spark1-vllm \

diff --git a/cmd/obol/model.go b/cmd/obol/model.go
@@ -148,7 +148,7 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error {
 		if len(ollamaModels) == 0 {
 			u.Warn("No models pulled in Ollama")
 			u.Print("")
-			u.Print("  Hint: Pull a model with: ollama pull qwen3:8b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
+			u.Print("  Hint: Pull a model with: ollama pull qwen3.5:9b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
 			u.Print("  Hint: Or run: obol model pull")
 
 			return errors.New("ollama is running but has no models")
@@ -576,16 +576,18 @@ func promptModelPull(u *ui.UI) (string, error) {
 
 	suggestions := []string{
 		"qwen3.6:27b              (17 GB) — High-quality general-purpose (recommended, needs ≥32GB RAM)",
-		"qwen3.6:27b-coding-mxfp8 (~13 GB) — Code generation (Qwen3.6, MXFP8 quant)",
-		"qwen3:8b                 (5.2 GB) — Fast general-purpose, laptop-friendly",
+		"qwen3.6:27b-coding-mxfp8 (31 GB) — Code generation (Qwen3.6, MXFP8 quant)",
+		"qwen3.5:9b               (6.6 GB) — Validated baseline; fits on most laptops",
+		"qwen3.5:4b               (3.4 GB) — Smallest current Qwen, low-RAM laptops",
 		"deepseek-r1:8b           (4.9 GB) — Reasoning",
 		"gemma3:4b                (3.3 GB) — Lightweight, multilingual",
 		"Other (enter name)",
 	}
 	modelNames := []string{
 		"qwen3.6:27b",
 		"qwen3.6:27b-coding-mxfp8",
-		"qwen3:8b",
+		"qwen3.5:9b",
+		"qwen3.5:4b",
 		"deepseek-r1:8b",
 		"gemma3:4b",
 	}

diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -88,9 +88,9 @@ curl -s http://localhost:11434/api/tags | python3 -m json.tool
 If you don't have a model yet, pull one:
 
 ```bash
-ollama pull qwen3.5:35b   # Large model with tool-call support
+ollama pull qwen3.5:9b    # Validated baseline, ~6.6 GB
 # Or a smaller model for quick testing:
-ollama pull qwen3:0.6b
+ollama pull qwen3.5:4b    # ~3.4 GB
 ```
 
 ### 3b. Verify LiteLLM can reach Ollama

diff --git a/docs/guides/monetize-inference.md b/docs/guides/monetize-inference.md
@@ -98,7 +98,7 @@ Make sure the model is available in your host Ollama:
 ollama pull qwen3.5:9b
 
 # Or a smaller model for quick testing
-ollama pull qwen3:0.6b
+ollama pull qwen3.5:4b
 
 # Verify it's available
 curl -s http://localhost:11434/api/tags | python3 -m json.tool
@@ -250,7 +250,7 @@ curl -s -X POST "$TUNNEL_URL/rpc" \
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # ERC-8004 registration document (200)
 curl -s "$TUNNEL_URL/.well-known/agent-registration.json" | jq .
@@ -262,7 +262,7 @@ You can also verify locally (bypasses Cloudflare):
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 ```
 
 A **402 Payment Required** response confirms the x402 gate is working. The response body contains the payment requirements:
@@ -323,7 +323,7 @@ Send a request without payment:
 ```bash
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}' \
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}' \
     -D - 2>&1 | head -30
 ```
 
@@ -347,7 +347,7 @@ client = LLMClient(
 )
 
 # Automatically: 402 -> sign EIP-712 -> retry with payment header -> 200
-response = client.chat("qwen3:0.6b", "Explain Ethereum in one sentence.")
+response = client.chat("qwen3.5:9b", "Explain Ethereum in one sentence.")
 print(f"Response: {response}")
 print(f"Session cost: ${client._session_total_usd}")
 ```
@@ -369,7 +369,7 @@ The SDK handles the full x402 flow:
 # Step 1: Get payment requirements from the 402 response
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # Step 2: Sign the EIP-712 payment (requires SDK or custom code)
 # The 402 body contains: payTo, amount, asset, network, extra.name, extra.version
@@ -380,7 +380,7 @@ curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
 curl -s -X POST "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -H "X-PAYMENT: <base64-encoded-x402-envelope>" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 # -> 200 OK + inference response
 ```
 
@@ -411,7 +411,7 @@ export TUNNEL_URL=$(obol tunnel status | grep -oE 'https://[a-z0-9-]+\.trycloudf
 curl -s -w "\nHTTP %{http_code}" -X POST \
     "$TUNNEL_URL/services/my-qwen/v1/chat/completions" \
     -H "Content-Type: application/json" \
-    -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}]}'
+    -d '{"model":"qwen3.5:9b","messages":[{"role":"user","content":"Hello"}]}'
 
 # Paid request through tunnel (supported production path)
 # The buyer talks to LiteLLM, which routes paid models through the in-pod

diff --git a/flows/flow-03-inference.sh b/flows/flow-03-inference.sh
@@ -41,8 +41,8 @@ for i in $(seq 1 15); do
     sleep 2
 done
 
-# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL qwen3:0.6b
-# is only registered in Ollama directly; the x402 sell/buy flows use it via that path)
+# Use qwen3.5:9b — it is configured in LiteLLM's model_list (FLOW_MODEL is the
+# default in flows/lib.sh; the x402 sell/buy flows route through it directly)
 LITELLM_MODEL="qwen3.5:9b"
 out=$(curl -sf --max-time 120 -X POST http://localhost:8001/v1/chat/completions \
     -H "Content-Type: application/json" \

diff --git a/flows/lib.sh b/flows/lib.sh
@@ -328,7 +328,7 @@ route_llm_via_obol_cli() {
     local existing
     existing=$($runner model list 2>/dev/null || true)
     local entry
-    for entry in qwen3.5:9b qwen3:0.6b; do
+    for entry in qwen3.5:9b qwen3.5:4b; do
         if printf '%s' "$existing" | grep -Fq "$entry"; then
             $runner model remove "$entry" --no-sync >/dev/null 2>&1 || true
         fi

diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md
@@ -62,7 +62,8 @@ for m in data.get('models', []):
 
 Report the available models to the user. If no models are found, suggest they pull one:
 ```bash
-ollama pull qwen3:8b      # Laptop-friendly, ~5 GB
+ollama pull qwen3.5:4b    # Smallest current Qwen, ~3.4 GB (low-RAM laptops)
+ollama pull qwen3.5:9b    # Validated baseline, ~6.6 GB
 ollama pull qwen3.6:27b   # High quality, ~17 GB (needs ≥32GB RAM)
 ```
 

diff --git a/internal/embed/skills/sell/SKILL.md b/internal/embed/skills/sell/SKILL.md
@@ -30,7 +30,7 @@ python3 scripts/monetize.py list
 
 # Create a new offer to monetize a local Ollama model
 python3 scripts/monetize.py create my-inference \
-  --model qwen3:8b \
+  --model qwen3.5:9b \
   --runtime ollama \
   --upstream ollama \
   --namespace llm \

diff --git a/internal/openclaw/monetize_integration_test.go b/internal/openclaw/monetize_integration_test.go
@@ -2890,7 +2890,7 @@ func TestIntegration_Fork_RealFacilitatorPayment(t *testing.T) {
 //
 // Prerequisites:
 //   - Running k3d cluster with CRD, agent, x402-verifier, CF quick tunnel
-//   - Ollama with a cached model (any model — qwen2.5, qwen3:0.6b, etc.)
+//   - Ollama with a cached model (any model — qwen3.5:4b, qwen3.5:9b, etc.)
 //   - Anvil (Foundry) installed
 //   - x402-rs source or binary (set X402_RS_DIR or X402_FACILITATOR_BIN)
 func TestIntegration_Tunnel_RealFacilitatorOllama(t *testing.T) {

diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go
@@ -137,7 +137,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error {
 			} else {
 				u.Successf("Local Ollama detected at %s (no models pulled)", ollamaEndpoint())
 				u.Print("  Run 'obol model setup' to configure a cloud provider,")
-				u.Print("  or pull a model with: ollama pull qwen3:8b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
+				u.Print("  or pull a model with: ollama pull qwen3.5:9b  (or qwen3.6:27b on hosts with ≥32GB RAM)")
 			}
 		} else {
 			u.Warnf("Local Ollama not detected on host (%s)", ollamaEndpoint())