Port ProviderConfig model/token overrides and client startup cleanup fix

Copilot · edburns · web-flow · commit 46e5c3248f72 · 2026-05-05T23:18:06.000Z
- Add modelId, wireModel, maxInputTokens, maxOutputTokens to ProviderConfig
- Fix client startup cleanup race: properly destroy CLI process on failure
- Add unit tests for new ProviderConfig field serialization
- Add E2E tests for provider wire model forwarding
- Simplify BYOK identity limitations documentation (per reference impl)

Co-authored-by: edburns &lt;75821+edburns@users.noreply.github.com&gt;
diff --git a/src/main/java/com/github/copilot/sdk/CopilotClient.java b/src/main/java/com/github/copilot/sdk/CopilotClient.java
@@ -187,9 +187,9 @@ private CompletableFuture<Connection> startCore() {
     }
 
     private Connection startCoreBody() {
+        Process process = null;
         try {
             JsonRpcClient rpc;
-            Process process = null;
 
             if (optionsHost != null && optionsPort != null) {
                 // External server (TCP)
@@ -215,6 +215,11 @@ private Connection startCoreBody() {
             LOG.info("Copilot client connected");
             return connection;
         } catch (Exception e) {
+            // Clean up process if startup failed partway through
+            if (process != null) {
+                cleanupCliProcess(process);
+            }
+
             String stderr = serverManager.getStderrOutput();
             if (!stderr.isEmpty()) {
                 throw new CompletionException(new IOException(
@@ -224,6 +229,20 @@ private Connection startCoreBody() {
         }
     }
 
+    private static void cleanupCliProcess(Process process) {
+        try {
+            if (process.isAlive()) {
+                process.destroyForcibly();
+                process.waitFor(FORCE_KILL_TIMEOUT_SECONDS, TimeUnit.SECONDS);
+            }
+        } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            LOG.log(Level.FINE, "Interrupted while cleaning up CLI process", ie);
+        } catch (Exception ex) {
+            LOG.log(Level.FINE, "Error cleaning up CLI process during failed startup", ex);
+        }
+    }
+
     private static final int MIN_PROTOCOL_VERSION = 2;
     private static final int METHOD_NOT_FOUND_ERROR_CODE = -32601;
 
diff --git a/src/main/java/com/github/copilot/sdk/json/ProviderConfig.java b/src/main/java/com/github/copilot/sdk/json/ProviderConfig.java
@@ -57,6 +57,18 @@ public class ProviderConfig {
     @JsonProperty("headers")
     private Map<String, String> headers;
 
+    @JsonProperty("modelId")
+    private String modelId;
+
+    @JsonProperty("wireModel")
+    private String wireModel;
+
+    @JsonProperty("maxPromptTokens")
+    private Integer maxInputTokens;
+
+    @JsonProperty("maxOutputTokens")
+    private Integer maxOutputTokens;
+
     /**
      * Gets the provider type.
      *
@@ -225,4 +237,116 @@ public ProviderConfig setHeaders(Map<String, String> headers) {
         this.headers = headers;
         return this;
     }
+
+    /**
+     * Gets the well-known model name used by the runtime to look up agent
+     * configuration (tools, prompts, reasoning behavior) and default token limits.
+     * <p>
+     * Also used as the wire model when {@link #getWireModel()} is not set. Falls
+     * back to {@link SessionConfig#getModel()}.
+     *
+     * @return the model ID, or {@code null} if not set
+     */
+    public String getModelId() {
+        return modelId;
+    }
+
+    /**
+     * Sets the well-known model name used by the runtime to look up agent
+     * configuration (tools, prompts, reasoning behavior) and default token limits.
+     * <p>
+     * Also used as the wire model when {@link #setWireModel(String)} is not set.
+     * Falls back to {@link SessionConfig#getModel()}.
+     *
+     * @param modelId
+     *            the model ID
+     * @return this config for method chaining
+     */
+    public ProviderConfig setModelId(String modelId) {
+        this.modelId = modelId;
+        return this;
+    }
+
+    /**
+     * Gets the model name sent to the provider API for inference.
+     * <p>
+     * Use this when the provider's model name (e.g. an Azure deployment name or a
+     * custom fine-tune name) differs from {@link #getModelId()}. Falls back to
+     * {@link #getModelId()}, then {@link SessionConfig#getModel()}.
+     *
+     * @return the wire model name, or {@code null} if not set
+     */
+    public String getWireModel() {
+        return wireModel;
+    }
+
+    /**
+     * Sets the model name sent to the provider API for inference.
+     * <p>
+     * Use this when the provider's model name (e.g. an Azure deployment name or a
+     * custom fine-tune name) differs from {@link #getModelId()}. Falls back to
+     * {@link #getModelId()}, then {@link SessionConfig#getModel()}.
+     *
+     * @param wireModel
+     *            the wire model name
+     * @return this config for method chaining
+     */
+    public ProviderConfig setWireModel(String wireModel) {
+        this.wireModel = wireModel;
+        return this;
+    }
+
+    /**
+     * Gets the override for the resolved model's default max prompt tokens.
+     * <p>
+     * The runtime triggers conversation compaction before sending a request when
+     * the prompt (system message, history, tool definitions, user message) would
+     * exceed this limit.
+     *
+     * @return the max input tokens, or {@code null} if not set
+     */
+    public Integer getMaxInputTokens() {
+        return maxInputTokens;
+    }
+
+    /**
+     * Sets the override for the resolved model's default max prompt tokens.
+     * <p>
+     * The runtime triggers conversation compaction before sending a request when
+     * the prompt (system message, history, tool definitions, user message) would
+     * exceed this limit.
+     *
+     * @param maxInputTokens
+     *            the max input tokens
+     * @return this config for method chaining
+     */
+    public ProviderConfig setMaxInputTokens(Integer maxInputTokens) {
+        this.maxInputTokens = maxInputTokens;
+        return this;
+    }
+
+    /**
+     * Gets the override for the resolved model's default max output tokens.
+     * <p>
+     * When hit, the model stops generating and returns a truncated response.
+     *
+     * @return the max output tokens, or {@code null} if not set
+     */
+    public Integer getMaxOutputTokens() {
+        return maxOutputTokens;
+    }
+
+    /**
+     * Sets the override for the resolved model's default max output tokens.
+     * <p>
+     * When hit, the model stops generating and returns a truncated response.
+     *
+     * @param maxOutputTokens
+     *            the max output tokens
+     * @return this config for method chaining
+     */
+    public ProviderConfig setMaxOutputTokens(Integer maxOutputTokens) {
+        this.maxOutputTokens = maxOutputTokens;
+        return this;
+    }
 }
diff --git a/src/site/markdown/advanced.md b/src/site/markdown/advanced.md
@@ -421,17 +421,36 @@ foundry service status
 
 When using BYOK, be aware of these limitations:
 
-#### Identity Limitations
+#### Model and Token Limit Overrides
 
-BYOK authentication uses **static credentials only**. The following identity providers are NOT supported:
+You can override the model name and token limits used by the provider:
 
-- ❌ **Microsoft Entra ID (Azure AD)** - No support for Entra managed identities or service principals
-- ❌ **Third-party identity providers** - No OIDC, SAML, or other federated identity
-- ❌ **Managed identities** - Azure Managed Identity is not supported
+```java
+var session = client.createSession(
+    new SessionConfig().setOnPermissionRequest(PermissionHandler.APPROVE_ALL)
+        .setProvider(new ProviderConfig()
+            .setType("openai")
+            .setBaseUrl("https://api.openai.com/v1")
+            .setApiKey("sk-...")
+            .setModelId("gpt-4o")              // Runtime model for config lookup
+            .setWireModel("my-finetune-v3")    // Actual model name sent to provider API
+            .setMaxInputTokens(100_000)        // Override max prompt tokens
+            .setMaxOutputTokens(4096))         // Override max output tokens
+).get();
+```
 
-You must use an API key or static bearer token that you manage yourself.
+| Property | Description |
+|---|---|
+| `modelId` | Well-known model name for runtime config lookup (tools, prompts, reasoning). Also used as wire model when `wireModel` is not set. Falls back to `SessionConfig.model`. |
+| `wireModel` | Model name sent to the provider API. Use when the provider's model name (e.g. Azure deployment name or fine-tune) differs from `modelId`. Falls back to `modelId`, then `SessionConfig.model`. |
+| `maxInputTokens` | Override max prompt tokens. The runtime compacts conversation before exceeding this limit. |
+| `maxOutputTokens` | Override max output tokens. The model stops generating when this limit is hit. |
 
-**Why not Entra ID?** While Entra ID does issue bearer tokens, these tokens are short-lived (typically 1 hour) and require automatic refresh via the Azure Identity SDK. The `bearerToken` option only accepts a static string—there is no callback mechanism for the SDK to request fresh tokens. For long-running workloads requiring Entra authentication, you would need to implement your own token refresh logic and create new sessions with updated tokens.
+#### Identity Limitations
+
+BYOK authentication uses **static credentials only**.
+
+You must use an API key or static bearer token that you manage yourself.
 
 ---
 
diff --git a/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java b/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java
@@ -46,6 +46,10 @@ void testDefaultsAreNull() {
         assertNull(provider.getApiKey());
         assertNull(provider.getBearerToken());
         assertNull(provider.getAzure());
+        assertNull(provider.getModelId());
+        assertNull(provider.getWireModel());
+        assertNull(provider.getMaxInputTokens());
+        assertNull(provider.getMaxOutputTokens());
     }
 
     @Test
@@ -232,7 +236,8 @@ void testSerializeCustomWireApi() throws Exception {
     void testSerializeAllFields() throws Exception {
         var provider = new ProviderConfig().setType("azure-openai").setWireApi("completions")
                 .setBaseUrl("https://my-resource.openai.azure.com").setApiKey("my-api-key")
-                .setBearerToken("my-bearer-token").setAzure(new AzureOptions().setApiVersion("2024-02-01"));
+                .setBearerToken("my-bearer-token").setAzure(new AzureOptions().setApiVersion("2024-02-01"))
+                .setModelId("gpt-4o").setWireModel("my-deployment").setMaxInputTokens(50_000).setMaxOutputTokens(2048);
 
         JsonNode json = MAPPER.valueToTree(provider);
 
@@ -242,7 +247,11 @@ void testSerializeAllFields() throws Exception {
         assertEquals("my-api-key", json.get("apiKey").asText());
         assertEquals("my-bearer-token", json.get("bearerToken").asText());
         assertEquals("2024-02-01", json.get("azure").get("apiVersion").asText());
-        assertEquals(6, json.size(), "Expected exactly 6 JSON fields");
+        assertEquals("gpt-4o", json.get("modelId").asText());
+        assertEquals("my-deployment", json.get("wireModel").asText());
+        assertEquals(50_000, json.get("maxPromptTokens").asInt());
+        assertEquals(2048, json.get("maxOutputTokens").asInt());
+        assertEquals(10, json.size(), "Expected exactly 10 JSON fields");
     }
 
     @Test
@@ -285,6 +294,30 @@ void testRoundTripProviderConfig() throws Exception {
         assertEquals(original.getAzure().getApiVersion(), deserialized.getAzure().getApiVersion());
     }
 
+    @Test
+    void testSerializeProviderModelAndTokenOverrides() throws Exception {
+        var provider = new ProviderConfig().setType("openai").setBaseUrl("https://example.com/provider")
+                .setHeaders(java.util.Map.of("Authorization", "Bearer provider-token")).setModelId("gpt-4o")
+                .setWireModel("my-finetune-v3").setMaxInputTokens(100_000).setMaxOutputTokens(4096);
+
+        JsonNode json = MAPPER.valueToTree(provider);
+
+        assertEquals("https://example.com/provider", json.get("baseUrl").asText());
+        assertEquals("Bearer provider-token", json.get("headers").get("Authorization").asText());
+        assertEquals("gpt-4o", json.get("modelId").asText());
+        assertEquals("my-finetune-v3", json.get("wireModel").asText());
+        assertEquals(100_000, json.get("maxPromptTokens").asInt());
+        assertEquals(4096, json.get("maxOutputTokens").asInt());
+
+        ProviderConfig deserialized = MAPPER.treeToValue(json, ProviderConfig.class);
+        assertNotNull(deserialized);
+        assertEquals("https://example.com/provider", deserialized.getBaseUrl());
+        assertEquals("gpt-4o", deserialized.getModelId());
+        assertEquals("my-finetune-v3", deserialized.getWireModel());
+        assertEquals(100_000, deserialized.getMaxInputTokens());
+        assertEquals(4096, deserialized.getMaxOutputTokens());
+    }
+
     @Test
     void testForwardCompatibilityIgnoresUnknownFields() throws Exception {
         String json = """
diff --git a/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java b/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java
@@ -127,4 +127,54 @@ private static String getSystemMessage(Map<String, Object> exchange) {
         }
         return null;
     }
+
+    @SuppressWarnings("unchecked")
+    private static String getRequestModel(Map<String, Object> exchange) {
+        Object requestObj = exchange.get("request");
+        if (!(requestObj instanceof Map<?, ?> request)) {
+            return null;
+        }
+        Object model = request.get("model");
+        return model != null ? model.toString() : null;
+    }
+
+    @Test
+    void testShouldForwardProviderWireModel() throws Exception {
+        ctx.configureForTest("session_config", "should_forward_provider_wire_model");
+
+        try (CopilotClient client = ctx.createClient()) {
+            CopilotSession session = client.createSession(new SessionConfig().setModel("claude-sonnet-4.5")
+                    .setProvider(new com.github.copilot.sdk.json.ProviderConfig().setType("openai")
+                            .setBaseUrl(ctx.getProxyUrl()).setApiKey("test-provider-key")
+                            .setWireModel("test-wire-model").setMaxOutputTokens(1024))
+                    .setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get();
+
+            session.sendAndWait(new MessageOptions().setPrompt("What is 1+1?")).get(60, TimeUnit.SECONDS);
+
+            List<Map<String, Object>> exchanges = ctx.getExchanges();
+            assertFalse(exchanges.isEmpty(), "Should have at least one exchange");
+            assertEquals("test-wire-model", getRequestModel(exchanges.get(0)));
+        }
+    }
+
+    @Test
+    void testShouldUseProviderModelIdAsWireModel() throws Exception {
+        ctx.configureForTest("session_config", "should_use_provider_model_id_as_wire_model");
+
+        try (CopilotClient client = ctx.createClient()) {
+            CopilotSession session = client
+                    .createSession(new SessionConfig()
+                            .setProvider(new com.github.copilot.sdk.json.ProviderConfig().setType("openai")
+                                    .setBaseUrl(ctx.getProxyUrl()).setApiKey("test-provider-key")
+                                    .setModelId("claude-sonnet-4.5"))
+                            .setOnPermissionRequest(PermissionHandler.APPROVE_ALL))
+                    .get();
+
+            session.sendAndWait(new MessageOptions().setPrompt("What is 1+1?")).get(60, TimeUnit.SECONDS);
+
+            List<Map<String, Object>> exchanges = ctx.getExchanges();
+            assertFalse(exchanges.isEmpty(), "Should have at least one exchange");
+            assertEquals("claude-sonnet-4.5", getRequestModel(exchanges.get(0)));
+        }
+    }
 }