Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions crates/openshell-router/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,10 @@ fn build_backend_url(endpoint: &str, path: &str) -> String {
return format!("{base}{}", &path[3..]);
}

if path == "/v1/codex" || path.starts_with("/v1/codex/") {
return format!("{base}{}", &path[3..]);
}

format!("{base}{path}")
}

Expand Down Expand Up @@ -553,6 +557,14 @@ mod tests {
);
}

#[test]
fn build_backend_url_strips_v1_for_codex_backend() {
assert_eq!(
build_backend_url("https://chatgpt.com/backend-api", "/v1/codex/responses"),
"https://chatgpt.com/backend-api/codex/responses"
);
}

fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
ResolvedRoute {
name: "inference.local".to_string(),
Expand Down
45 changes: 45 additions & 0 deletions crates/openshell-router/tests/backend_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,51 @@ async fn proxy_upstream_401_returns_error() {
assert_eq!(response.status, 401);
}

#[tokio::test]
async fn proxy_strips_v1_before_forwarding_codex_to_non_versioned_base() {
let mock_server = MockServer::start().await;

Mock::given(method("POST"))
.and(path("/codex/responses"))
.and(bearer_token("test-api-key"))
.respond_with(ResponseTemplate::new(200).set_body_string("{}"))
.mount(&mock_server)
.await;

let router = Router::new().unwrap();
let candidates = vec![ResolvedRoute {
name: "inference.local".to_string(),
endpoint: mock_server.uri(),
model: "gpt-5.4".to_string(),
api_key: "test-api-key".to_string(),
protocols: vec!["openai_responses".to_string()],
auth: AuthHeader::Bearer,
default_headers: Vec::new(),
passthrough_headers: Vec::new(),
timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT,
}];

let body = serde_json::to_vec(&serde_json::json!({
"model": "gpt-5.4",
"input": "Hello"
}))
.unwrap();

let response = router
.proxy_with_candidates(
"openai_responses",
"POST",
"/v1/codex/responses",
vec![("content-type".to_string(), "application/json".to_string())],
bytes::Bytes::from(body),
&candidates,
)
.await
.unwrap();

assert_eq!(response.status, 200);
}

#[tokio::test]
async fn proxy_no_compatible_route_returns_error() {
let router = Router::new().unwrap();
Expand Down
14 changes: 14 additions & 0 deletions crates/openshell-sandbox/src/l7/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ pub fn default_patterns() -> Vec<InferenceApiPattern> {
protocol: "openai_responses".to_string(),
kind: "responses".to_string(),
},
InferenceApiPattern {
method: "POST".to_string(),
path_glob: "/v1/codex/*".to_string(),
protocol: "openai_responses".to_string(),
kind: "codex_responses".to_string(),
},
InferenceApiPattern {
method: "POST".to_string(),
path_glob: "/v1/messages".to_string(),
Expand Down Expand Up @@ -399,6 +405,14 @@ mod tests {
assert_eq!(result.unwrap().protocol, "openai_responses");
}

#[test]
fn detect_codex_responses() {
let patterns = default_patterns();
let result = detect_inference_pattern("POST", "/v1/codex/responses", &patterns);
assert!(result.is_some());
assert_eq!(result.unwrap().protocol, "openai_responses");
}

#[test]
fn detect_anthropic_messages() {
let patterns = default_patterns();
Expand Down
1 change: 1 addition & 0 deletions docs/inference/about.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Supported request patterns depend on the provider configured for `inference.loca
| Chat Completions | `POST` | `/v1/chat/completions` |
| Completions | `POST` | `/v1/completions` |
| Responses | `POST` | `/v1/responses` |
| OpenAI OAuth Codex | `POST` | `/v1/codex/*` |
| Model Discovery | `GET` | `/v1/models` |
| Model Discovery | `GET` | `/v1/models/*` |

Expand Down
2 changes: 2 additions & 0 deletions docs/inference/configure.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ The client-supplied `model` and `api_key` values are not sent upstream. The priv

Some SDKs require a non-empty API key even though `inference.local` does not use the sandbox-provided value. In those cases, pass any placeholder such as `test` or `unused`.

OpenAI OAuth Codex clients can also use the local endpoint. If the Codex client appends `/codex/responses` to the configured base URL, set the base URL to `https://inference.local/v1`. OpenShell matches `/v1/codex/*` and forwards those requests to upstream backends that do not expose a `/v1` prefix.

Use this endpoint when inference should stay local to the host for privacy and security reasons. External providers that should be reached directly belong in `network_policies` instead.

When the upstream runs on the same machine as the gateway, bind it to `0.0.0.0` and point the provider at `host.openshell.internal` or the host's LAN IP. `127.0.0.1` and `localhost` usually fail because the request originates from the gateway or sandbox runtime, not from your shell.
Expand Down
1 change: 1 addition & 0 deletions examples/local-inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,5 @@ In cluster mode, use `openshell cluster inference set` instead.
| `POST /v1/chat/completions` | `openai_chat_completions` | Chat completion |
| `POST /v1/completions` | `openai_completions` | Text completion |
| `POST /v1/responses` | `openai_responses` | Responses API |
| `POST /v1/codex/*` | `openai_responses` | OpenAI OAuth Codex responses |
| `POST /v1/messages` | `anthropic_messages` | Anthropic messages |
Loading