diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index c4be7d360..ffcab65e7 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -669,7 +669,11 @@ async def stream( continue choice = event.choices[0] - if hasattr(choice.delta, "reasoning_content") and choice.delta.reasoning_content: + # vLLM ≥0.19.1 renamed reasoning_content → reasoning; check both. + reasoning_text = getattr(choice.delta, "reasoning_content", None) or getattr( + choice.delta, "reasoning", None + ) + if reasoning_text: chunks, data_type = self._stream_switch_content("reasoning_content", data_type) for chunk in chunks: yield chunk @@ -677,7 +681,7 @@ async def stream( { "chunk_type": "content_delta", "data_type": data_type, - "data": choice.delta.reasoning_content, + "data": reasoning_text, } ) diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index 94e4caa3f..baf6c2fe0 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -906,25 +906,33 @@ async def test_stream(openai_client, model_id, model, agenerator, alist): mock_tool_call_2_part_1 = unittest.mock.Mock(index=1) mock_delta_1 = unittest.mock.Mock( reasoning_content="", + reasoning=None, content=None, tool_calls=None, ) mock_delta_2 = unittest.mock.Mock( reasoning_content="\nI'm thinking", + reasoning=None, content=None, tool_calls=None, ) mock_delta_3 = unittest.mock.Mock( - content="I'll calculate", tool_calls=[mock_tool_call_1_part_1, mock_tool_call_2_part_1], reasoning_content=None + content="I'll calculate", + tool_calls=[mock_tool_call_1_part_1, mock_tool_call_2_part_1], + reasoning_content=None, + reasoning=None, ) mock_tool_call_1_part_2 = unittest.mock.Mock(index=0) mock_tool_call_2_part_2 = unittest.mock.Mock(index=1) mock_delta_4 = unittest.mock.Mock( - content="that for you", tool_calls=[mock_tool_call_1_part_2, mock_tool_call_2_part_2], reasoning_content=None + content="that for you", + tool_calls=[mock_tool_call_1_part_2, mock_tool_call_2_part_2], + reasoning_content=None, + reasoning=None, ) - mock_delta_5 = unittest.mock.Mock(content="", tool_calls=None, reasoning_content=None) + mock_delta_5 = unittest.mock.Mock(content="", tool_calls=None, reasoning_content=None, reasoning=None) mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)]) mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)]) @@ -995,9 +1003,76 @@ async def test_stream(openai_client, model_id, model, agenerator, alist): openai_client.chat.completions.create.assert_called_once_with(**expected_request) +@pytest.mark.asyncio +async def test_stream_vllm_reasoning_field(openai_client, model_id, model, agenerator, alist): + """vLLM >=0.19.1 emits delta.reasoning instead of delta.reasoning_content.""" + mock_delta_1 = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None, reasoning="thinking...") + mock_delta_2 = unittest.mock.Mock(content="done", tool_calls=None, reasoning_content=None, reasoning=None) + mock_usage = unittest.mock.Mock(prompt_tokens=5, completion_tokens=5, total_tokens=10, prompt_tokens_details=None) + + mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)]) + mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)]) + mock_event_3 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_2)]) + mock_event_4 = unittest.mock.Mock(usage=mock_usage) + + openai_client.chat.completions.create = unittest.mock.AsyncMock( + return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4]) + ) + + messages = [{"role": "user", "content": [{"text": "hi"}]}] + tru_events = await alist(model.stream(messages)) + exp_events = [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"start": {}}}, + {"contentBlockDelta": {"delta": {"reasoningContent": {"text": "thinking..."}}}}, + {"contentBlockStop": {}}, + {"contentBlockStart": {"start": {}}}, + {"contentBlockDelta": {"delta": {"text": "done"}}}, + {"contentBlockDelta": {"delta": {"text": "done"}}}, + {"contentBlockStop": {}}, + {"messageStop": {"stopReason": "end_turn"}}, + { + "metadata": { + "usage": {"inputTokens": 5, "outputTokens": 5, "totalTokens": 10}, + "metrics": {"latencyMs": 0}, + } + }, + ] + + assert tru_events == exp_events + + +@pytest.mark.asyncio +async def test_stream_reasoning_content_takes_priority_over_reasoning(openai_client, model_id, model, agenerator, alist): + """reasoning_content takes priority when both fields are present (backward compat).""" + mock_delta_1 = unittest.mock.Mock( + content=None, tool_calls=None, reasoning_content="from_reasoning_content", reasoning="from_reasoning" + ) + mock_delta_2 = unittest.mock.Mock(content="done", tool_calls=None, reasoning_content=None, reasoning=None) + mock_usage = unittest.mock.Mock(prompt_tokens=5, completion_tokens=5, total_tokens=10, prompt_tokens_details=None) + + mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)]) + mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)]) + mock_event_3 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_2)]) + mock_event_4 = unittest.mock.Mock(usage=mock_usage) + + openai_client.chat.completions.create = unittest.mock.AsyncMock( + return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4]) + ) + + messages = [{"role": "user", "content": [{"text": "hi"}]}] + tru_events = await alist(model.stream(messages)) + + reasoning_deltas = [ + e for e in tru_events if "contentBlockDelta" in e and "reasoningContent" in e["contentBlockDelta"]["delta"] + ] + assert len(reasoning_deltas) == 1 + assert reasoning_deltas[0]["contentBlockDelta"]["delta"]["reasoningContent"]["text"] == "from_reasoning_content" + + @pytest.mark.asyncio async def test_stream_empty(openai_client, model_id, model, agenerator, alist): - mock_delta = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None) + mock_delta = unittest.mock.Mock(content=None, tool_calls=None, reasoning_content=None, reasoning=None) mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)]) mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)]) @@ -1031,7 +1106,7 @@ async def test_stream_empty(openai_client, model_id, model, agenerator, alist): @pytest.mark.asyncio async def test_stream_with_empty_choices(openai_client, model, agenerator, alist): - mock_delta = unittest.mock.Mock(content="content", tool_calls=None, reasoning_content=None) + mock_delta = unittest.mock.Mock(content="content", tool_calls=None, reasoning_content=None, reasoning=None) mock_usage = unittest.mock.Mock(prompt_tokens=10, completion_tokens=20, total_tokens=30) # Event with no choices attribute @@ -1462,7 +1537,7 @@ async def test_stream_with_injected_client(model_id, agenerator, alist): mock_injected_client = unittest.mock.AsyncMock() mock_injected_client.close = unittest.mock.AsyncMock() - mock_delta = unittest.mock.Mock(content="Hello", tool_calls=None, reasoning_content=None) + mock_delta = unittest.mock.Mock(content="Hello", tool_calls=None, reasoning_content=None, reasoning=None) mock_event_1 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)]) mock_event_2 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)]) mock_event_3 = unittest.mock.Mock()