Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/strands/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,15 +669,19 @@ async def stream(
continue
choice = event.choices[0]

if hasattr(choice.delta, "reasoning_content") and choice.delta.reasoning_content:
# vLLM ≥0.19.1 renamed reasoning_content → reasoning; check both.
reasoning_text = getattr(choice.delta, "reasoning_content", None) or getattr(
choice.delta, "reasoning", None
)
if reasoning_text:
chunks, data_type = self._stream_switch_content("reasoning_content", data_type)
for chunk in chunks:
yield chunk
yield self.format_chunk(
{
"chunk_type": "content_delta",
"data_type": data_type,
"data": choice.delta.reasoning_content,
"data": reasoning_text,
}
)

Expand Down
87 changes: 81 additions & 6 deletions tests/strands/models/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,25 +906,33 @@ async def test_stream(openai_client, model_id, model, agenerator, alist):
mock_tool_call_2_part_1 = unittest.mock.Mock(index=1)
mock_delta_1 = unittest.mock.Mock(
reasoning_content="",
reasoning=None,
content=None,
tool_calls=None,
)
mock_delta_2 = unittest.mock.Mock(
reasoning_content="\nI'm thinking",
reasoning=None,
content=None,
tool_calls=None,
)
mock_delta_3 = unittest.mock.Mock(
content="I'll calculate", tool_calls=[mock_tool_call_1_part_1, mock_tool_call_2_part_1], reasoning_content=None
content="I'll calculate",
tool_calls=[mock_tool_call_1_part_1, mock_tool_call_2_part_1],
reasoning_content=None,
reasoning=None,
)

mock_tool_call_1_part_2 = unittest.mock.Mock(index=0)
mock_tool_call_2_part_2 = unittest.mock.Mock(index=1)
mock_delta_4 = unittest.mock.Mock(
content="that for you", tool_calls=[mock_tool_call_1_part_2, mock_tool_call_2_part_2], reasoning_content=None
content="that for you",
tool_calls=[mock_tool_call_1_part_2, mock_tool_call_2_part_2],
reasoning_content=None,
reasoning=None,
)

mock_delta_5 = unittest.mock.Mock(content="", tool_calls=None, reasoning_content=None)
mock_delta_5 = unittest.mock.Mock(content="", tool_calls=None, reasoning_content=None, reasoning=None)

mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)])
mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)])
Expand Down Expand Up @@ -995,9 +1003,76 @@ async def test_stream(openai_client, model_id, model, agenerator, alist):
openai_client.chat.completions.create.assert_called_once_with(**expected_request)


@pytest.mark.asyncio
async def test_stream_vllm_reasoning_field(openai_client, model_id, model, agenerator, alist):
"""vLLM >=0.19.1 emits delta.reasoning instead of delta.reasoning_content."""
mock_delta_1 = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None, reasoning="thinking...")
mock_delta_2 = unittest.mock.Mock(content="done", tool_calls=None, reasoning_content=None, reasoning=None)
mock_usage = unittest.mock.Mock(prompt_tokens=5, completion_tokens=5, total_tokens=10, prompt_tokens_details=None)

mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)])
mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)])
mock_event_3 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_2)])
mock_event_4 = unittest.mock.Mock(usage=mock_usage)

openai_client.chat.completions.create = unittest.mock.AsyncMock(
return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4])
)

messages = [{"role": "user", "content": [{"text": "hi"}]}]
tru_events = await alist(model.stream(messages))
exp_events = [
{"messageStart": {"role": "assistant"}},
{"contentBlockStart": {"start": {}}},
{"contentBlockDelta": {"delta": {"reasoningContent": {"text": "thinking..."}}}},
{"contentBlockStop": {}},
{"contentBlockStart": {"start": {}}},
{"contentBlockDelta": {"delta": {"text": "done"}}},
{"contentBlockDelta": {"delta": {"text": "done"}}},
{"contentBlockStop": {}},
{"messageStop": {"stopReason": "end_turn"}},
{
"metadata": {
"usage": {"inputTokens": 5, "outputTokens": 5, "totalTokens": 10},
"metrics": {"latencyMs": 0},
}
},
]

assert tru_events == exp_events


@pytest.mark.asyncio
async def test_stream_reasoning_content_takes_priority_over_reasoning(openai_client, model_id, model, agenerator, alist):
"""reasoning_content takes priority when both fields are present (backward compat)."""
mock_delta_1 = unittest.mock.Mock(
content=None, tool_calls=None, reasoning_content="from_reasoning_content", reasoning="from_reasoning"
)
mock_delta_2 = unittest.mock.Mock(content="done", tool_calls=None, reasoning_content=None, reasoning=None)
mock_usage = unittest.mock.Mock(prompt_tokens=5, completion_tokens=5, total_tokens=10, prompt_tokens_details=None)

mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)])
mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)])
mock_event_3 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_2)])
mock_event_4 = unittest.mock.Mock(usage=mock_usage)

openai_client.chat.completions.create = unittest.mock.AsyncMock(
return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4])
)

messages = [{"role": "user", "content": [{"text": "hi"}]}]
tru_events = await alist(model.stream(messages))

reasoning_deltas = [
e for e in tru_events if "contentBlockDelta" in e and "reasoningContent" in e["contentBlockDelta"]["delta"]
]
assert len(reasoning_deltas) == 1
assert reasoning_deltas[0]["contentBlockDelta"]["delta"]["reasoningContent"]["text"] == "from_reasoning_content"


@pytest.mark.asyncio
async def test_stream_empty(openai_client, model_id, model, agenerator, alist):
mock_delta = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None)
mock_delta = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None, reasoning=None)

mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)])
mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)])
Expand Down Expand Up @@ -1031,7 +1106,7 @@ async def test_stream_empty(openai_client, model_id, model, agenerator, alist):

@pytest.mark.asyncio
async def test_stream_with_empty_choices(openai_client, model, agenerator, alist):
mock_delta = unittest.mock.Mock(content="content", tool_calls=None, reasoning_content=None)
mock_delta = unittest.mock.Mock(content="content", tool_calls=None, reasoning_content=None, reasoning=None)
mock_usage = unittest.mock.Mock(prompt_tokens=10, completion_tokens=20, total_tokens=30)

# Event with no choices attribute
Expand Down Expand Up @@ -1462,7 +1537,7 @@ async def test_stream_with_injected_client(model_id, agenerator, alist):
mock_injected_client = unittest.mock.AsyncMock()
mock_injected_client.close = unittest.mock.AsyncMock()

mock_delta = unittest.mock.Mock(content="Hello", tool_calls=None, reasoning_content=None)
mock_delta = unittest.mock.Mock(content="Hello", tool_calls=None, reasoning_content=None, reasoning=None)
mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)])
mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)])
mock_event_3 = unittest.mock.Mock()
Expand Down