diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index fbca70ae1..2e7ef3fbc 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -518,6 +518,10 @@ fn build_backend_url(endpoint: &str, path: &str) -> String { return format!("{base}{}", &path[3..]); } + if path == "/v1/codex" || path.starts_with("/v1/codex/") { + return format!("{base}{}", &path[3..]); + } + format!("{base}{path}") } @@ -553,6 +557,14 @@ mod tests { ); } + #[test] + fn build_backend_url_strips_v1_for_codex_backend() { + assert_eq!( + build_backend_url("https://chatgpt.com/backend-api", "/v1/codex/responses"), + "https://chatgpt.com/backend-api/codex/responses" + ); + } + fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute { ResolvedRoute { name: "inference.local".to_string(), diff --git a/crates/openshell-router/tests/backend_integration.rs b/crates/openshell-router/tests/backend_integration.rs index 6b21de94d..ab227f449 100644 --- a/crates/openshell-router/tests/backend_integration.rs +++ b/crates/openshell-router/tests/backend_integration.rs @@ -108,6 +108,51 @@ async fn proxy_upstream_401_returns_error() { assert_eq!(response.status, 401); } +#[tokio::test] +async fn proxy_strips_v1_before_forwarding_codex_to_non_versioned_base() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/codex/responses")) + .and(bearer_token("test-api-key")) + .respond_with(ResponseTemplate::new(200).set_body_string("{}")) + .mount(&mock_server) + .await; + + let router = Router::new().unwrap(); + let candidates = vec![ResolvedRoute { + name: "inference.local".to_string(), + endpoint: mock_server.uri(), + model: "gpt-5.4".to_string(), + api_key: "test-api-key".to_string(), + protocols: vec!["openai_responses".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + }]; + + let body = serde_json::to_vec(&serde_json::json!({ + "model": "gpt-5.4", + "input": "Hello" + })) + .unwrap(); + + let response = router + .proxy_with_candidates( + "openai_responses", + "POST", + "/v1/codex/responses", + vec![("content-type".to_string(), "application/json".to_string())], + bytes::Bytes::from(body), + &candidates, + ) + .await + .unwrap(); + + assert_eq!(response.status, 200); +} + #[tokio::test] async fn proxy_no_compatible_route_returns_error() { let router = Router::new().unwrap(); diff --git a/crates/openshell-sandbox/src/l7/inference.rs b/crates/openshell-sandbox/src/l7/inference.rs index acda0bb36..0d9df64b0 100644 --- a/crates/openshell-sandbox/src/l7/inference.rs +++ b/crates/openshell-sandbox/src/l7/inference.rs @@ -37,6 +37,12 @@ pub fn default_patterns() -> Vec { protocol: "openai_responses".to_string(), kind: "responses".to_string(), }, + InferenceApiPattern { + method: "POST".to_string(), + path_glob: "/v1/codex/*".to_string(), + protocol: "openai_responses".to_string(), + kind: "codex_responses".to_string(), + }, InferenceApiPattern { method: "POST".to_string(), path_glob: "/v1/messages".to_string(), @@ -399,6 +405,14 @@ mod tests { assert_eq!(result.unwrap().protocol, "openai_responses"); } + #[test] + fn detect_codex_responses() { + let patterns = default_patterns(); + let result = detect_inference_pattern("POST", "/v1/codex/responses", &patterns); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "openai_responses"); + } + #[test] fn detect_anthropic_messages() { let patterns = default_patterns(); diff --git a/docs/inference/about.mdx b/docs/inference/about.mdx index 97f951e9b..2a1ac3d82 100644 --- a/docs/inference/about.mdx +++ b/docs/inference/about.mdx @@ -43,6 +43,7 @@ Supported request patterns depend on the provider configured for `inference.loca | Chat Completions | `POST` | `/v1/chat/completions` | | Completions | `POST` | `/v1/completions` | | Responses | `POST` | `/v1/responses` | +| OpenAI OAuth Codex | `POST` | `/v1/codex/*` | | Model Discovery | `GET` | `/v1/models` | | Model Discovery | `GET` | `/v1/models/*` | diff --git a/docs/inference/configure.mdx b/docs/inference/configure.mdx index e1ce8b90d..03ab9e2b7 100644 --- a/docs/inference/configure.mdx +++ b/docs/inference/configure.mdx @@ -158,6 +158,8 @@ The client-supplied `model` and `api_key` values are not sent upstream. The priv Some SDKs require a non-empty API key even though `inference.local` does not use the sandbox-provided value. In those cases, pass any placeholder such as `test` or `unused`. +OpenAI OAuth Codex clients can also use the local endpoint. If the Codex client appends `/codex/responses` to the configured base URL, set the base URL to `https://inference.local/v1`. OpenShell matches `/v1/codex/*` and forwards those requests to upstream backends that do not expose a `/v1` prefix. + Use this endpoint when inference should stay local to the host for privacy and security reasons. External providers that should be reached directly belong in `network_policies` instead. When the upstream runs on the same machine as the gateway, bind it to `0.0.0.0` and point the provider at `host.openshell.internal` or the host's LAN IP. `127.0.0.1` and `localhost` usually fail because the request originates from the gateway or sandbox runtime, not from your shell. diff --git a/examples/local-inference/README.md b/examples/local-inference/README.md index 62e056e35..6abf01ef9 100644 --- a/examples/local-inference/README.md +++ b/examples/local-inference/README.md @@ -163,4 +163,5 @@ In cluster mode, use `openshell cluster inference set` instead. | `POST /v1/chat/completions` | `openai_chat_completions` | Chat completion | | `POST /v1/completions` | `openai_completions` | Text completion | | `POST /v1/responses` | `openai_responses` | Responses API | +| `POST /v1/codex/*` | `openai_responses` | OpenAI OAuth Codex responses | | `POST /v1/messages` | `anthropic_messages` | Anthropic messages |