From 526516aab8c5759bfac6d66ea00cbf3e5a84d06e Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 23 Apr 2026 15:03:32 +0000
Subject: [PATCH 1/4] Update stacklok/toolhive to v0.24.1

Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/upstream-projects.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/upstream-projects.yaml b/.github/upstream-projects.yaml
index df8d66f0..c7f6e540 100644
--- a/.github/upstream-projects.yaml
+++ b/.github/upstream-projects.yaml
@@ -35,7 +35,7 @@ projects:
 
   - id: toolhive
     repo: stacklok/toolhive
-    version: v0.24.0
+    version: v0.24.1
     # toolhive is a monorepo covering the CLI, the Kubernetes
     # operator, and the vMCP gateway. It also introduces cross-
     # cutting features that land in concepts/, integrations/,

From 5e575351e400009725ee787cc09b0f21f27fdc88 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 23 Apr 2026 15:04:39 +0000
Subject: [PATCH 2/4] Refresh reference assets for toolhive v0.24.1

---
 static/api-specs/toolhive-api.yaml | 63 ++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/static/api-specs/toolhive-api.yaml b/static/api-specs/toolhive-api.yaml
index e6c07a3e..d601f942 100644
--- a/static/api-specs/toolhive-api.yaml
+++ b/static/api-specs/toolhive-api.yaml
@@ -372,8 +372,8 @@ components:
         subject_token_type:
           description: |-
             SubjectTokenType specifies the type of the subject token being exchanged.
-            Common values: tokenTypeAccessToken (default), tokenTypeIDToken, tokenTypeJWT.
-            If empty, defaults to tokenTypeAccessToken.
+            Common values: oauth.TokenTypeAccessToken (default), oauth.TokenTypeIDToken, oauth.TokenTypeJWT.
+            If empty, defaults to oauth.TokenTypeAccessToken.
           type: string
         token_url:
           description: TokenURL is the OAuth 2.0 token endpoint URL
@@ -1176,6 +1176,13 @@ components:
             K8sPodTemplatePatch is a JSON string to patch the Kubernetes pod template
             Only applicable when using Kubernetes runtime
           type: string
+        mcpserver_generation:
+          description: |-
+            MCPServerGeneration is the K8s .metadata.generation of the MCPServer CR that rendered
+            this RunConfig. The Kubernetes runtime uses it as a monotonic version to prevent stale
+            rolling-update pods from overwriting a newer RunConfig's StatefulSet apply. Zero value
+            means unversioned (backward-compat with older operators, or non-operator callers).
+          type: integer
         middleware_configs:
           description: |-
             MiddlewareConfigs contains the list of middleware to apply to the transport
@@ -4324,12 +4331,30 @@ paths:
               schema:
                 type: string
           description: Bad Request
+        "401":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Unauthorized (registry refused credentials)
+        "404":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Not Found (artifact not present in registry)
         "409":
           content:
             application/json:
               schema:
                 type: string
           description: Conflict
+        "429":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Too Many Requests (registry rate limit)
         "500":
           content:
             application/json:
@@ -4341,7 +4366,13 @@ paths:
             application/json:
               schema:
                 type: string
-          description: Bad Gateway
+          description: Bad Gateway (upstream registry failure)
+        "504":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Gateway Timeout (upstream pull timed out)
       summary: Install a skill
       tags:
       - skills
@@ -4560,6 +4591,24 @@ paths:
               schema:
                 type: string
           description: Bad Request
+        "401":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Unauthorized (registry refused credentials)
+        "404":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Not Found (artifact not present in registry)
+        "429":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Too Many Requests (registry rate limit)
         "500":
           content:
             application/json:
@@ -4571,7 +4620,13 @@ paths:
             application/json:
               schema:
                 type: string
-          description: Bad Gateway
+          description: Bad Gateway (upstream registry or git resolver failure)
+        "504":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Gateway Timeout (upstream pull timed out)
       summary: Get skill content
       tags:
       - skills

From c43a9e4a2eb78009f294323539ae656ac6accd63 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Thu, 23 Apr 2026 15:14:57 +0000
Subject: [PATCH 3/4] Document vMCP capacity limits for v0.24.1

Add a Capacity limits section to the vMCP scaling guide covering the
per-pod 1,000-session LRU cache, 30-minute inactivity TTL with Redis
sliding-window refresh, file-descriptor planning, Redis sizing and
default timeouts, and stateful-backend data loss on pod restart.
Derived from upstream PR stacklok/toolhive#5025 and verified against
source at the v0.24.1 tag.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../guides-vmcp/scaling-and-performance.mdx   | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/docs/toolhive/guides-vmcp/scaling-and-performance.mdx b/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
index 6a20cf1c..60c9b1ee 100644
--- a/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
+++ b/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
@@ -138,6 +138,79 @@ a dedicated vMCP instance per team instead.
 
 :::
 
+## Capacity limits
+
+Review these limits before planning capacity for a vMCP deployment.
+
+### Per-pod session cache
+
+Each vMCP pod holds a node-local LRU cache capped at **1,000 concurrent
+sessions**. When the cache is full, the least-recently-used session is evicted
+and its backend connections are closed. Any request in flight at eviction time
+fails, and the next request for that session ID triggers a cache miss.
+
+When Redis session storage is configured, the session manager transparently
+rebuilds the session from stored metadata and reconnects to backends, so clients
+do not need to reinitialize. Without Redis, an evicted session is lost and the
+client must reinitialize.
+
+To serve more than 1,000 concurrent sessions per replica, add vMCP replicas and
+configure Redis session storage. Total capacity scales as `replicas × 1,000`.
+
+### Session TTL
+
+The vMCP server applies a **30-minute inactivity TTL** to session metadata. A
+session with no activity for 30 minutes expires and must be reinitialized by the
+client.
+
+With Redis session storage, the TTL is a sliding window: every request
+atomically refreshes the key's expiry. Active sessions remain valid indefinitely
+as long as they receive at least one request per TTL window. There is no
+absolute maximum session lifetime.
+
+### File descriptors
+
+Each open backend connection consumes one file descriptor on the vMCP pod. A pod
+aggregating many MCP backends at high session concurrency can exhaust the
+container's `nofile` limit before hitting the 1,000-session cache cap.
+
+Estimate the requirement as `concurrent_sessions × backends_per_session`, plus
+overhead for incoming client connections. The default Linux soft `nofile` limit
+is typically 1,024; raise it in the container spec or at the node level if you
+expect to serve hundreds of sessions aggregating multiple backends.
+
+### Redis sizing
+
+When you enable Redis session storage, size the Redis instance for the full
+fleet. Session payloads include routing tables and tool metadata - a rough
+estimate is 10-50 KB per session depending on backend count and tool count, with
+a fleet-wide maximum of `replicas × 1,000` concurrent sessions.
+
+Configure Redis with the `allkeys-lru` eviction policy so Redis sheds stale
+sessions under memory pressure rather than returning errors on new writes. Redis
+persistence is not required for session storage; if the Redis instance restarts,
+all sessions are lost and clients must reinitialize.
+
+Default timeouts (all tunable through `sessionStorage` configuration):
+
+| Setting       | Default   |
+| ------------- | --------- |
+| Dial timeout  | 5 seconds |
+| Read timeout  | 3 seconds |
+| Write timeout | 3 seconds |
+
+### Stateful backend data loss on pod restart
+
+vMCP is a stateless proxy: it holds routing tables and tool aggregation state,
+but backend MCP servers own their own state (browser sessions, database cursors,
+open files). When a vMCP pod restarts or is evicted, backend connections are
+torn down without a graceful MCP shutdown sequence.
+
+With Redis session storage, the routing table survives and clients can
+reconnect. However, any backend-side state is not recovered - the new connection
+starts fresh. In-flight tool calls are lost without a response. Implement retry
+logic with idempotency guards for tool invocations that modify external state.
+
 ## Next steps
 
 - [Explore Kubernetes operator guides](../guides-k8s/index.mdx) for managing MCP

From 57dd409549d747a793662b9bc19ccf591b1159c9 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Thu, 23 Apr 2026 15:20:08 +0000
Subject: [PATCH 4/4] Editorial fixes to vMCP capacity limits section

- Remove inaccurate claim that Redis client timeouts are tunable via
  sessionStorage CRD fields; the CRD does not expose them.
- Rephrase two mid-sentence spaced hyphens per the project style guide.
- Expand TTL acronym on first use.
- Flip one passive-voice sentence to active voice.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../guides-vmcp/scaling-and-performance.mdx    | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/docs/toolhive/guides-vmcp/scaling-and-performance.mdx b/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
index 60c9b1ee..5a163ef7 100644
--- a/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
+++ b/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
@@ -157,11 +157,11 @@ client must reinitialize.
 To serve more than 1,000 concurrent sessions per replica, add vMCP replicas and
 configure Redis session storage. Total capacity scales as `replicas × 1,000`.
 
-### Session TTL
+### Session time-to-live (TTL)
 
 The vMCP server applies a **30-minute inactivity TTL** to session metadata. A
-session with no activity for 30 minutes expires and must be reinitialized by the
-client.
+session that receives no activity for 30 minutes expires, and the client must
+reinitialize it.
 
 With Redis session storage, the TTL is a sliding window: every request
 atomically refreshes the key's expiry. Active sessions remain valid indefinitely
@@ -182,7 +182,7 @@ expect to serve hundreds of sessions aggregating multiple backends.
 ### Redis sizing
 
 When you enable Redis session storage, size the Redis instance for the full
-fleet. Session payloads include routing tables and tool metadata - a rough
+fleet. Session payloads include routing tables and tool metadata. A rough
 estimate is 10-50 KB per session depending on backend count and tool count, with
 a fleet-wide maximum of `replicas × 1,000` concurrent sessions.
 
@@ -191,7 +191,8 @@ sessions under memory pressure rather than returning errors on new writes. Redis
 persistence is not required for session storage; if the Redis instance restarts,
 all sessions are lost and clients must reinitialize.
 
-Default timeouts (all tunable through `sessionStorage` configuration):
+The Redis client uses these default timeouts. They are hardcoded defaults and
+are not currently exposed through the VirtualMCPServer CRD.
 
 | Setting       | Default   |
 | ------------- | --------- |
@@ -207,9 +208,10 @@ open files). When a vMCP pod restarts or is evicted, backend connections are
 torn down without a graceful MCP shutdown sequence.
 
 With Redis session storage, the routing table survives and clients can
-reconnect. However, any backend-side state is not recovered - the new connection
-starts fresh. In-flight tool calls are lost without a response. Implement retry
-logic with idempotency guards for tool invocations that modify external state.
+reconnect. However, the new connection does not recover any backend-side state;
+it starts fresh. In-flight tool calls are lost without a response. Implement
+retry logic with idempotency guards for tool invocations that modify external
+state.
 
 ## Next steps