NVIDIA · cosmicnet · Apr 20, 2026
@@ -518,6 +518,10 @@ fn build_backend_url(endpoint: &str, path: &str) -> String {
         return format!("{base}{}", &path[3..]);
     }
 
+    if path == "/v1/codex" || path.starts_with("/v1/codex/") {
+        return format!("{base}{}", &path[3..]);
+    }
+
     format!("{base}{path}")
 }
 
@@ -553,6 +557,14 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_backend_url_strips_v1_for_codex_backend() {
+        assert_eq!(
+            build_backend_url("https://chatgpt.com/backend-api", "/v1/codex/responses"),
+            "https://chatgpt.com/backend-api/codex/responses"
+        );
+    }
+
     fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
         ResolvedRoute {
             name: "inference.local".to_string(),

@@ -108,6 +108,51 @@ async fn proxy_upstream_401_returns_error() {
     assert_eq!(response.status, 401);
 }
 
+#[tokio::test]
+async fn proxy_strips_v1_before_forwarding_codex_to_non_versioned_base() {
+    let mock_server = MockServer::start().await;
+
+    Mock::given(method("POST"))
+        .and(path("/codex/responses"))
+        .and(bearer_token("test-api-key"))
+        .respond_with(ResponseTemplate::new(200).set_body_string("{}"))
+        .mount(&mock_server)
+        .await;
+
+    let router = Router::new().unwrap();
+    let candidates = vec![ResolvedRoute {
+        name: "inference.local".to_string(),
+        endpoint: mock_server.uri(),
+        model: "gpt-5.4".to_string(),
+        api_key: "test-api-key".to_string(),
+        protocols: vec!["openai_responses".to_string()],
+        auth: AuthHeader::Bearer,
+        default_headers: Vec::new(),
+        passthrough_headers: Vec::new(),
+        timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT,
+    }];
+
+    let body = serde_json::to_vec(&serde_json::json!({
+        "model": "gpt-5.4",
+        "input": "Hello"
+    }))
+    .unwrap();
+
+    let response = router
+        .proxy_with_candidates(
+            "openai_responses",
+            "POST",
+            "/v1/codex/responses",
+            vec![("content-type".to_string(), "application/json".to_string())],
+            bytes::Bytes::from(body),
+            &candidates,
+        )
+        .await
+        .unwrap();
+
+    assert_eq!(response.status, 200);
+}
+
 #[tokio::test]
 async fn proxy_no_compatible_route_returns_error() {
     let router = Router::new().unwrap();

@@ -37,6 +37,12 @@ pub fn default_patterns() -> Vec<InferenceApiPattern> {
             protocol: "openai_responses".to_string(),
             kind: "responses".to_string(),
         },
+        InferenceApiPattern {
+            method: "POST".to_string(),
+            path_glob: "/v1/codex/*".to_string(),
+            protocol: "openai_responses".to_string(),
+            kind: "codex_responses".to_string(),
+        },
         InferenceApiPattern {
             method: "POST".to_string(),
             path_glob: "/v1/messages".to_string(),
@@ -399,6 +405,14 @@ mod tests {
         assert_eq!(result.unwrap().protocol, "openai_responses");
     }
 
+    #[test]
+    fn detect_codex_responses() {
+        let patterns = default_patterns();
+        let result = detect_inference_pattern("POST", "/v1/codex/responses", &patterns);
+        assert!(result.is_some());
+        assert_eq!(result.unwrap().protocol, "openai_responses");
+    }
+
     #[test]
     fn detect_anthropic_messages() {
         let patterns = default_patterns();

@@ -43,6 +43,7 @@ Supported request patterns depend on the provider configured for `inference.loca
 | Chat Completions | `POST` | `/v1/chat/completions` |
 | Completions | `POST` | `/v1/completions` |
 | Responses | `POST` | `/v1/responses` |
+| OpenAI OAuth Codex | `POST` | `/v1/codex/*` |
 | Model Discovery | `GET` | `/v1/models` |
 | Model Discovery | `GET` | `/v1/models/*` |
 

@@ -158,6 +158,8 @@ The client-supplied `model` and `api_key` values are not sent upstream. The priv
 
 Some SDKs require a non-empty API key even though `inference.local` does not use the sandbox-provided value. In those cases, pass any placeholder such as `test` or `unused`.
 
+OpenAI OAuth Codex clients can also use the local endpoint. If the Codex client appends `/codex/responses` to the configured base URL, set the base URL to `https://inference.local/v1`. OpenShell matches `/v1/codex/*` and forwards those requests to upstream backends that do not expose a `/v1` prefix.
+
 Use this endpoint when inference should stay local to the host for privacy and security reasons. External providers that should be reached directly belong in `network_policies` instead.
 
 When the upstream runs on the same machine as the gateway, bind it to `0.0.0.0` and point the provider at `host.openshell.internal` or the host's LAN IP. `127.0.0.1` and `localhost` usually fail because the request originates from the gateway or sandbox runtime, not from your shell.

@@ -163,4 +163,5 @@ In cluster mode, use `openshell cluster inference set` instead.
 | `POST /v1/chat/completions` | `openai_chat_completions` | Chat completion |
 | `POST /v1/completions` | `openai_completions` | Text completion |
 | `POST /v1/responses` | `openai_responses` | Responses API |
+| `POST /v1/codex/*` | `openai_responses` | OpenAI OAuth Codex responses |
 | `POST /v1/messages` | `anthropic_messages` | Anthropic messages |