From 637201fe23dd77d1fcbaefbaa709387edeffe408 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:13:02 +0100 Subject: [PATCH 01/60] test(langchain): Consolidate available tools assertion --- .../integrations/langchain/test_langchain.py | 90 ++++--------------- 1 file changed, 17 insertions(+), 73 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 132da0a9a0..2c4db478ae 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -20,6 +20,7 @@ from langchain_core.runnables import RunnableConfig from langchain_core.language_models.chat_models import BaseChatModel + import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.integrations.langchain import ( @@ -304,14 +305,30 @@ def test_langchain_agent( assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] # Verify that available tools are always recorded regardless of PII settings + tools_found = False for chat_span in chat_spans: span_data = chat_span.get("data", {}) if SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in span_data: + tools_found = True tools_data = span_data[SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] assert tools_data is not None, ( "Available tools should always be recorded regardless of PII settings" ) + if isinstance(tools_data, str): + # If serialized as string, should contain tool name + assert "get_word_length" in tools_data + else: + # If still a list, verify structure + assert len(tools_data) >= 1 + names = [ + tool.get("name") for tool in tools_data if isinstance(tool, dict) + ] + assert "get_word_length" in names + + # Ensure we found at least one span with tools data + assert tools_found, "No spans found with tools data" + def test_langchain_error(sentry_init, capture_events): sentry_init( @@ -718,79 +735,6 @@ def test_langchain_callback_list_existing_callback(sentry_init): assert handler is sentry_callback -def test_tools_integration_in_spans(sentry_init, capture_events): - """Test that tools are properly set on spans in actual LangChain integration.""" - global llm_type - llm_type = "openai-chat" - - sentry_init( - integrations=[LangchainIntegration(include_prompts=False)], - traces_sample_rate=1.0, - ) - events = capture_events() - - prompt = ChatPromptTemplate.from_messages( - [ - ("system", "You are a helpful assistant"), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - ) - - global stream_result_mock - stream_result_mock = Mock( - side_effect=[ - [ - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk(content="Simple response"), - ), - ] - ] - ) - - llm = MockOpenAI( - model_name="gpt-3.5-turbo", - temperature=0, - openai_api_key="badkey", - ) - agent = create_openai_tools_agent(llm, [get_word_length], prompt) - agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - - with start_transaction(): - list(agent_executor.stream({"input": "Hello"})) - - # Check that events were captured and contain tools data - if events: - tx = events[0] - spans = tx.get("spans", []) - - # Look for spans that should have tools data - tools_found = False - for span in spans: - span_data = span.get("data", {}) - if SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in span_data: - tools_found = True - tools_data = span_data[SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - # Verify tools are in the expected format - assert isinstance(tools_data, (str, list)) # Could be serialized - if isinstance(tools_data, str): - # If serialized as string, should contain tool name - assert "get_word_length" in tools_data - else: - # If still a list, verify structure - assert len(tools_data) >= 1 - names = [ - tool.get("name") - for tool in tools_data - if isinstance(tool, dict) - ] - assert "get_word_length" in names - - # Ensure we found at least one span with tools data - assert tools_found, "No spans found with tools data" - - def test_langchain_integration_with_langchain_core_only(sentry_init, capture_events): """Test that the langchain integration works when langchain.agents.AgentExecutor is not available or langchain is not installed, but langchain-core is. From d1d816088fb2456bab1cf7a34b5465b6d6e58351 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:13:32 +0100 Subject: [PATCH 02/60] whitespace change --- tests/integrations/langchain/test_langchain.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 2c4db478ae..3efb241d01 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -20,7 +20,6 @@ from langchain_core.runnables import RunnableConfig from langchain_core.language_models.chat_models import BaseChatModel - import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.integrations.langchain import ( From fbde33f82542741b23fc7d7bf5e37c4b018ccc3e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:17:16 +0100 Subject: [PATCH 03/60] test(langchain): Consolidate span origin assertion --- .../integrations/langchain/test_langchain.py | 122 +----------------- 1 file changed, 6 insertions(+), 116 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 3efb241d01..f91de78daa 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -214,11 +214,17 @@ def test_langchain_agent( tx = events[0] assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") assert len(chat_spans) == 2 + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 @@ -409,122 +415,6 @@ def test_span_status_error(sentry_init, capture_events): assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_origin(sentry_init, capture_events): - sentry_init( - integrations=[LangchainIntegration()], - traces_sample_rate=1.0, - ) - events = capture_events() - - prompt = ChatPromptTemplate.from_messages( - [ - ( - "system", - "You are very powerful assistant, but don't know current events", - ), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - ) - global stream_result_mock - stream_result_mock = Mock( - side_effect=[ - [ - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="", - additional_kwargs={ - "tool_calls": [ - { - "index": 0, - "id": "call_BbeyNhCKa6kYLYzrD40NGm3b", - "function": { - "arguments": "", - "name": "get_word_length", - }, - "type": "function", - } - ] - }, - ), - ), - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="", - additional_kwargs={ - "tool_calls": [ - { - "index": 0, - "id": None, - "function": { - "arguments": '{"word": "eudca"}', - "name": None, - }, - "type": None, - } - ] - }, - ), - ), - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="5", - usage_metadata={ - "input_tokens": 142, - "output_tokens": 50, - "total_tokens": 192, - "input_token_details": {"audio": 0, "cache_read": 0}, - "output_token_details": {"audio": 0, "reasoning": 0}, - }, - ), - generation_info={"finish_reason": "function_call"}, - ), - ], - [ - ChatGenerationChunk( - text="The word eudca has 5 letters.", - type="ChatGenerationChunk", - message=AIMessageChunk( - content="The word eudca has 5 letters.", - usage_metadata={ - "input_tokens": 89, - "output_tokens": 28, - "total_tokens": 117, - "input_token_details": {"audio": 0, "cache_read": 0}, - "output_token_details": {"audio": 0, "reasoning": 0}, - }, - ), - ), - ChatGenerationChunk( - type="ChatGenerationChunk", - generation_info={"finish_reason": "stop"}, - message=AIMessageChunk(content=""), - ), - ], - ] - ) - llm = MockOpenAI( - model_name="gpt-3.5-turbo", - temperature=0, - openai_api_key="badkey", - ) - agent = create_openai_tools_agent(llm, [get_word_length], prompt) - - agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - - with start_transaction(): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) - - (event,) = events - - assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.langchain" - - def test_manual_callback_no_duplication(sentry_init): """ Test that when a user manually provides a SentryLangchainCallback, From a4ed76088899e8b6442ab4f85cc596312e7b4b8f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:21:45 +0100 Subject: [PATCH 04/60] test(langchain): Replace mocks with httpx types --- tests/conftest.py | 8 +- .../integrations/langchain/test_langchain.py | 268 +++++++++++++----- 2 files changed, 202 insertions(+), 74 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7f76fc2aee..815ba02d2b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1018,10 +1018,14 @@ async def inner(values): @pytest.fixture def server_side_event_chunks(): - def inner(events): + def inner(events, include_event_type=True): for event in events: payload = event.model_dump() - chunk = f"event: {payload['type']}\ndata: {json.dumps(payload)}\n\n" + chunk = ( + f"event: {payload['type']}\ndata: {json.dumps(payload)}\n\n" + if include_event_type + else f"data: {json.dumps(payload)}\n\n" + ) yield chunk.encode("utf-8") return inner diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index f91de78daa..d880dd64a1 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -40,6 +40,21 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder +from openai.types.chat.chat_completion_chunk import ( + ChatCompletionChunk, + Choice, + ChoiceDelta, + ChoiceDeltaToolCall, + ChoiceDeltaToolCallFunction, +) + +from openai.types.completion_usage import ( + CompletionTokensDetails, + CompletionUsage, + PromptTokensDetails, +) + + @tool def get_word_length(word: str) -> int: """Returns the length of a word.""" @@ -95,6 +110,8 @@ def test_langchain_agent( use_unknown_llm_type, system_instructions_content, request, + get_model_response, + server_side_event_chunks, ): global llm_type llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat" @@ -120,87 +137,189 @@ def test_langchain_agent( MessagesPlaceholder(variable_name="agent_scratchpad"), ] ) - global stream_result_mock - stream_result_mock = Mock( - side_effect=[ + + tool_response = get_model_response( + server_side_event_chunks( [ - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="", - additional_kwargs={ - "tool_calls": [ - { - "index": 0, - "id": "call_BbeyNhCKa6kYLYzrD40NGm3b", - "function": { - "arguments": "", - "name": "get_word_length", - }, - "type": "function", - } - ] - }, - ), + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(role="assistant"), + finish_reason=None, + ), + ], ), - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="", - additional_kwargs={ - "tool_calls": [ - { - "index": 0, - "id": None, - "function": { - "arguments": '{"word": "eudca"}', - "name": None, - }, - "type": None, - } - ] - }, - ), + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta( + tool_calls=[ + ChoiceDeltaToolCall( + index=0, + id="call_BbeyNhCKa6kYLYzrD40NGm3b", + type="function", + function=ChoiceDeltaToolCallFunction( + name="get_word_length", + arguments="", + ), + ), + ], + ), + finish_reason=None, + ), + ], ), - ChatGenerationChunk( - type="ChatGenerationChunk", - message=AIMessageChunk( - content="5", - usage_metadata={ - "input_tokens": 142, - "output_tokens": 50, - "total_tokens": 192, - "input_token_details": {"audio": 0, "cache_read": 0}, - "output_token_details": {"audio": 0, "reasoning": 0}, - }, + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta( + tool_calls=[ + ChoiceDeltaToolCall( + index=0, + function=ChoiceDeltaToolCallFunction( + arguments='{"word": "eudca"}', + ), + ), + ], + ), + finish_reason=None, + ), + ], + ), + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content="5"), + finish_reason=None, + ), + ], + ), + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(), + finish_reason="function_call", + ), + ], + ), + ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=CompletionUsage( + prompt_tokens=142, + completion_tokens=50, + total_tokens=192, + prompt_tokens_details=PromptTokensDetails( + audio_tokens=0, + cached_tokens=0, + ), + completion_tokens_details=CompletionTokensDetails( + audio_tokens=0, + reasoning_tokens=0, + ), ), - generation_info={"finish_reason": "function_call"}, ), ], + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( [ - ChatGenerationChunk( - text="The word eudca has 5 letters.", - type="ChatGenerationChunk", - message=AIMessageChunk( - content="The word eudca has 5 letters.", - usage_metadata={ - "input_tokens": 89, - "output_tokens": 28, - "total_tokens": 117, - "input_token_details": {"audio": 0, "cache_read": 0}, - "output_token_details": {"audio": 0, "reasoning": 0}, - }, - ), + ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(role="assistant"), + finish_reason=None, + ), + ], ), - ChatGenerationChunk( - type="ChatGenerationChunk", - generation_info={"finish_reason": "stop"}, - message=AIMessageChunk(content=""), + ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content="The word eudca has 5 letters."), + finish_reason=None, + ), + ], + ), + ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(), + finish_reason="stop", + ), + ], + ), + ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=CompletionUsage( + prompt_tokens=89, + completion_tokens=28, + total_tokens=117, + prompt_tokens_details=PromptTokensDetails( + audio_tokens=0, + cached_tokens=0, + ), + completion_tokens_details=CompletionTokensDetails( + audio_tokens=0, + reasoning_tokens=0, + ), + ), ), ], - ] + include_event_type=False, + ) ) - llm = MockOpenAI( + + llm = ChatOpenAI( model_name="gpt-3.5-turbo", temperature=0, openai_api_key="badkey", @@ -209,8 +328,13 @@ def test_langchain_agent( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) - with start_transaction(): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + with patch.object( + llm.root_client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list(agent_executor.stream({"input": "How many letters in the word eudca"})) tx = events[0] assert tx["type"] == "transaction" From f8d42e96a2d583450b8d768ee9a46135c93cd1e9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:34:53 +0100 Subject: [PATCH 05/60] different function --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index d880dd64a1..d363b6a573 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -329,7 +329,7 @@ def test_langchain_agent( agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) with patch.object( - llm.root_client._client, + llm.client._client._client, "send", side_effect=[tool_response, final_response], ) as _: From fccd72f883b8efc2cca35b7d10700b6dd5cc88d5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 14:39:13 +0100 Subject: [PATCH 06/60] simplify --- .../integrations/langchain/test_langchain.py | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 3efb241d01..65e3073c5d 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -304,29 +304,9 @@ def test_langchain_agent( assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] # Verify that available tools are always recorded regardless of PII settings - tools_found = False for chat_span in chat_spans: - span_data = chat_span.get("data", {}) - if SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in span_data: - tools_found = True - tools_data = span_data[SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] - assert tools_data is not None, ( - "Available tools should always be recorded regardless of PII settings" - ) - - if isinstance(tools_data, str): - # If serialized as string, should contain tool name - assert "get_word_length" in tools_data - else: - # If still a list, verify structure - assert len(tools_data) >= 1 - names = [ - tool.get("name") for tool in tools_data if isinstance(tool, dict) - ] - assert "get_word_length" in names - - # Ensure we found at least one span with tools data - assert tools_found, "No spans found with tools data" + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert "get_word_length" in tools_data def test_langchain_error(sentry_init, capture_events): From 685f4f27c20bc2c8788ceea716c4c00c225964b4 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 15:45:31 +0100 Subject: [PATCH 07/60] test(langchain): Add basic agent test with Responses call --- .../integrations/langchain/test_langchain.py | 127 +++++++++++++++++- 1 file changed, 125 insertions(+), 2 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 155a8d3730..2e5144dd23 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -38,7 +38,8 @@ from langchain.agents import tool, AgentExecutor, create_openai_tools_agent from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder - +from langchain.agents import create_agent +from langchain_core.messages import HumanMessage, SystemMessage from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk, @@ -81,6 +82,128 @@ def _llm_type(self) -> str: return llm_type +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "blocks"], +) +def test_langchain_create_agent( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + nonstreaming_responses_model_response, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + model_repsonse = get_model_response( + nonstreaming_responses_model_response, + serialize_pydantic=True, + request_headers={ + "X-Stainless-Raw-Response": "True", + }, + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + use_responses_api=True, + ) + agent = create_agent( + model=llm, + tools=[get_word_length], + system_prompt=SystemMessage(content=system_instructions_content), + name="word_length_agent", + ) + + with patch.object( + llm.client._client._client, + "send", + return_value=model_repsonse, + ) as _: + with start_transaction(): + agent.invoke( + { + "messages": [ + HumanMessage(content="How many letters in the word eudca"), + ], + }, + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert chat_spans[0]["origin"] == "auto.ai.langchain" + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 + + if send_default_pii and include_prompts: + assert ( + chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == "Hello, how can I help you?" + ) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + + @pytest.mark.parametrize( "send_default_pii, include_prompts, use_unknown_llm_type", [ @@ -102,7 +225,7 @@ def _llm_type(self) -> str: ], ids=["string", "list", "blocks"], ) -def test_langchain_agent( +def test_langchain_openai_tools_agent( sentry_init, capture_events, send_default_pii, From dc31e7f8c470e41ad6f3f102620dbf14ebe422a4 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 15:47:05 +0100 Subject: [PATCH 08/60] add conftest --- tests/conftest.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 815ba02d2b..5dd62931f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,6 +48,12 @@ from sentry_sdk.transport import Transport from sentry_sdk.utils import reraise +try: + import openai +except ImportError: + openai = None + + from tests import _warning_recorder, _warning_recorder_mgr from typing import TYPE_CHECKING @@ -1033,10 +1039,11 @@ def inner(events, include_event_type=True): @pytest.fixture def get_model_response(): - def inner(response_content, serialize_pydantic=False): + def inner(response_content, serialize_pydantic=False, request_headers={}): model_request = HttpxRequest( "POST", "/responses", + headers=request_headers, ) if serialize_pydantic: @@ -1053,6 +1060,45 @@ def inner(response_content, serialize_pydantic=False): return inner +@pytest.fixture +def nonstreaming_responses_model_response(): + return openai.types.responses.Response( + id="resp_123", + output=[ + openai.types.responses.ResponseOutputMessage( + id="msg_123", + type="message", + status="completed", + content=[ + openai.types.responses.ResponseOutputText( + text="Hello, how can I help you?", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4", + object="response", + usage=openai.types.responses.ResponseUsage( + input_tokens=10, + input_tokens_details=openai.types.responses.response_usage.InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=openai.types.responses.response_usage.OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + + class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. From ae4f8d34b45f456d353bc33eafbab9696992b9be Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 15:48:57 +0100 Subject: [PATCH 09/60] remove unused variable --- tests/integrations/langchain/test_langchain.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 155a8d3730..7815b514fa 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -82,12 +82,12 @@ def _llm_type(self) -> str: @pytest.mark.parametrize( - "send_default_pii, include_prompts, use_unknown_llm_type", + "send_default_pii, include_prompts", [ - (True, True, False), - (True, False, False), - (False, True, False), - (False, False, True), + (True, True), + (True, False), + (False, True), + (False, False), ], ) @pytest.mark.parametrize( @@ -107,15 +107,11 @@ def test_langchain_agent( capture_events, send_default_pii, include_prompts, - use_unknown_llm_type, system_instructions_content, request, get_model_response, server_side_event_chunks, ): - global llm_type - llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat" - sentry_init( integrations=[ LangchainIntegration( From b693ad257827769e5979f69b0dc26aa5f2263666 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 16:09:33 +0100 Subject: [PATCH 10/60] fix undefined global --- tests/integrations/langchain/test_langchain.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7815b514fa..87720b7725 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -436,6 +436,9 @@ def test_langchain_agent( def test_langchain_error(sentry_init, capture_events): + global llm_type + llm_type = "acme-llm" + sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, From 014add2dda9ebe7cb6a32218d44f307203ef700e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 20 Mar 2026 16:24:57 +0100 Subject: [PATCH 11/60] simplify openai-agents --- .../openai_agents/test_openai_agents.py | 141 ++++++++---------- 1 file changed, 64 insertions(+), 77 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9edaa8501a..1442a2001b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -102,45 +102,6 @@ def mock_usage(): ) -@pytest.fixture -def mock_model_response(): - return Response( - id="resp_123", - output=[ - ResponseOutputMessage( - id="msg_123", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Hello, how can I help you?", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=20, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=5, - ), - total_tokens=30, - ), - ) - - @pytest.fixture def test_agent(): """Create a real Agent instance for testing.""" @@ -198,13 +159,19 @@ def test_agent_custom_model(): @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -340,7 +307,7 @@ async def test_agent_invocation_span( sentry_init, capture_events, test_agent_with_instructions, - mock_model_response, + nonstreaming_responses_model_response, instructions, input, request, @@ -353,7 +320,9 @@ async def test_agent_invocation_span( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent_with_instructions(instructions).clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -503,7 +472,7 @@ async def test_client_span_custom_model( sentry_init, capture_events, test_agent_custom_model, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -514,7 +483,9 @@ async def test_client_span_custom_model( model = OpenAIResponsesModel(model="my-custom-model", openai_client=client) agent = test_agent_custom_model.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -547,7 +518,7 @@ def test_agent_invocation_span_sync_no_pii( sentry_init, capture_events, test_agent, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -557,7 +528,9 @@ def test_agent_invocation_span_sync_no_pii( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -687,7 +660,7 @@ def test_agent_invocation_span_sync( sentry_init, capture_events, test_agent_with_instructions, - mock_model_response, + nonstreaming_responses_model_response, instructions, input, request, @@ -700,7 +673,9 @@ def test_agent_invocation_span_sync( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent_with_instructions(instructions).clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -1370,7 +1345,11 @@ def simple_test_tool(message: str) -> str: @pytest.mark.asyncio async def test_hosted_mcp_tool_propagation_header_streamed( - sentry_init, test_agent, async_iterator, server_side_event_chunks + sentry_init, + test_agent, + get_model_response, + async_iterator, + server_side_event_chunks, ): """ Test responses API is given trace propagation headers with HostedMCPTool. @@ -1402,11 +1381,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) - request = httpx.Request( - "POST", - "/responses", - ) - + request_headers = {} # openai-agents calls with_streaming_response() if available starting with # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0. # When using with_streaming_response() the header set below changes the response type: @@ -1414,12 +1389,10 @@ async def test_hosted_mcp_tool_propagation_header_streamed( if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr( agent_with_tool.model._client.responses, "with_streaming_response" ): - request.headers["X-Stainless-Raw-Response"] = "stream" + request_headers["X-Stainless-Raw-Response"] = "stream" - response = httpx.Response( - 200, - request=request, - content=async_iterator( + response = get_model_response( + async_iterator( server_side_event_chunks( [ ResponseCreatedEvent( @@ -1478,6 +1451,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( ] ) ), + request_headers=request_headers, ) # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 @@ -2230,7 +2204,11 @@ async def test_mcp_tool_execution_without_pii( @pytest.mark.asyncio async def test_multiple_agents_asyncio( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): """ Test that multiple agents can be run at the same time in asyncio tasks @@ -2240,7 +2218,9 @@ async def test_multiple_agents_asyncio( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -3154,7 +3134,11 @@ async def test_streaming_span_update_captures_response_data( @pytest.mark.asyncio async def test_streaming_ttft_on_chat_span( - sentry_init, test_agent, async_iterator, server_side_event_chunks + sentry_init, + test_agent, + get_model_response, + async_iterator, + server_side_event_chunks, ): """ Test that time-to-first-token (TTFT) is recorded on chat spans during streaming. @@ -3182,11 +3166,7 @@ async def test_streaming_ttft_on_chat_span( traces_sample_rate=1.0, ) - request = httpx.Request( - "POST", - "/responses", - ) - + request_headers = {} # openai-agents calls with_streaming_response() if available starting with # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0. # When using with_streaming_response() the header set below changes the response type: @@ -3194,12 +3174,10 @@ async def test_streaming_ttft_on_chat_span( if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr( agent_with_tool.model._client.responses, "with_streaming_response" ): - request.headers["X-Stainless-Raw-Response"] = "stream" + request_headers["X-Stainless-Raw-Response"] = "stream" - response = httpx.Response( - 200, - request=request, - content=async_iterator( + response = get_model_response( + async_iterator( server_side_event_chunks( [ ResponseCreatedEvent( @@ -3276,6 +3254,7 @@ async def test_streaming_ttft_on_chat_span( ] ) ), + request_headers=request_headers, ) # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 @@ -3313,7 +3292,11 @@ async def test_streaming_ttft_on_chat_span( ) @pytest.mark.asyncio async def test_conversation_id_on_all_spans( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). @@ -3323,7 +3306,9 @@ async def test_conversation_id_on_all_spans( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -3508,7 +3493,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init, capture_events, test_agent, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -3519,7 +3504,9 @@ async def test_no_conversation_id_when_not_provided( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, From f4a8602a8fb7046bec26b2bc0594542f5d605208 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 08:34:42 +0100 Subject: [PATCH 12/60] remove types that have default values --- tests/integrations/langchain/test_langchain.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index fe34a342fc..99ee2b7851 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -51,9 +51,7 @@ ) from openai.types.completion_usage import ( - CompletionTokensDetails, CompletionUsage, - PromptTokensDetails, ) LANGCHAIN_VERSION = package_version("langchain") @@ -363,14 +361,6 @@ def test_langchain_openai_tools_agent( prompt_tokens=142, completion_tokens=50, total_tokens=192, - prompt_tokens_details=PromptTokensDetails( - audio_tokens=0, - cached_tokens=0, - ), - completion_tokens_details=CompletionTokensDetails( - audio_tokens=0, - reasoning_tokens=0, - ), ), ), ], @@ -430,14 +420,6 @@ def test_langchain_openai_tools_agent( prompt_tokens=89, completion_tokens=28, total_tokens=117, - prompt_tokens_details=PromptTokensDetails( - audio_tokens=0, - cached_tokens=0, - ), - completion_tokens_details=CompletionTokensDetails( - audio_tokens=0, - reasoning_tokens=0, - ), ), ), ], From a728bd0f07447483699232ac2152487cab39ff4f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 08:40:31 +0100 Subject: [PATCH 13/60] test(langchain): Add tool execution test --- tests/conftest.py | 59 +++++ .../integrations/langchain/test_langchain.py | 187 ++++++++++++++++ .../openai_agents/test_openai_agents.py | 209 +++++++----------- 3 files changed, 326 insertions(+), 129 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5dd62931f1..1aa5f04f6a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1099,6 +1099,65 @@ def nonstreaming_responses_model_response(): ) +@pytest.fixture +def responses_tool_call_model_responses(): + def inner( + tool_name: str, + arguments: str, + response_model: str, + response_text: str, + response_ids: "Iterator[str]", + usages: "Iterator[openai.types.responses.ResponseUsage]", + ): + yield openai.types.responses.Response( + id=next(response_ids), + output=[ + openai.types.responses.ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name=tool_name, + type="function_call", + arguments=arguments, + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model=response_model, + object="response", + usage=next(usages), + ) + + yield openai.types.responses.Response( + id=next(response_ids), + output=[ + openai.types.responses.ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + openai.types.responses.ResponseOutputText( + text=response_text, + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model=response_model, + object="response", + usage=next(usages), + ) + + return inner + + class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 99ee2b7851..86f8e6ad1a 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -54,6 +54,14 @@ CompletionUsage, ) +from openai.types.responses import ( + ResponseUsage, +) +from openai.types.responses.response_usage import ( + InputTokensDetails, + OutputTokensDetails, +) + LANGCHAIN_VERSION = package_version("langchain") @@ -209,6 +217,185 @@ def test_langchain_create_agent( assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) +@pytest.mark.skipif( + LANGCHAIN_VERSION < (1,), + reason="LangChain 1.0+ required (ONE AGENT refactor)", +) +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +def test_tool_execution_span( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + get_model_response, + responses_tool_call_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + responses = responses_tool_call_model_responses( + tool_name="get_word_length", + arguments='{"word": "eudca"}', + response_model="gpt-4-0613", + response_text="The word eudca has 5 letters.", + response_ids=iter(["resp_1", "resp_2"]), + usages=iter( + [ + ResponseUsage( + input_tokens=142, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=50, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=192, + ), + ResponseUsage( + input_tokens=89, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=28, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=117, + ), + ] + ), + ) + tool_response = get_model_response( + next(responses), + serialize_pydantic=True, + request_headers={ + "X-Stainless-Raw-Response": "True", + }, + ) + final_response = get_model_response( + next(responses), + serialize_pydantic=True, + request_headers={ + "X-Stainless-Raw-Response": "True", + }, + ) + + llm = ChatOpenAI( + model_name="gpt-4", + temperature=0, + openai_api_key="badkey", + use_responses_api=True, + ) + agent = create_agent( + model=llm, + tools=[get_word_length], + name="word_length_agent", + ) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + agent.invoke( + { + "messages": [ + HumanMessage(content="How many letters in the word eudca"), + ], + }, + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert "get_word_length" in tools_data + + @pytest.mark.parametrize( "send_default_pii, include_prompts", [ diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1442a2001b..c428a822d4 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1062,7 +1062,11 @@ async def test_max_turns_before_handoff_span( @pytest.mark.asyncio async def test_tool_execution_span( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, + capture_events, + test_agent, + get_model_response, + responses_tool_call_model_responses, ): """ Test tool execution span creation. @@ -1078,75 +1082,45 @@ def simple_test_tool(message: str) -> str: model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent_with_tool = test_agent.clone(tools=[simple_test_tool], model=model) - tool_response = get_model_response( - Response( - id="resp_tool_123", - output=[ - ResponseFunctionToolCall( - id="call_123", - call_id="call_123", - name="simple_test_tool", - type="function_call", - arguments='{"message": "hello"}', - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, + responses = responses_tool_call_model_responses( + tool_name="simple_test_tool", + arguments='{"message": "hello"}', + response_model="gpt-4", + response_text="Task completed using the tool", + response_ids=iter(["resp_tool_123", "resp_final_123"]), + usages=iter( + [ + ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=15, ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, + ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=25, ), - total_tokens=15, - ), + ] ), + ) + tool_response = get_model_response( + next(responses), serialize_pydantic=True, ) - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using the tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), + next(responses), serialize_pydantic=True, ) @@ -2298,7 +2272,11 @@ def test_openai_agents_message_role_mapping( @pytest.mark.asyncio async def test_tool_execution_error_tracing( - sentry_init, capture_events, test_agent, get_model_response + sentry_init, + capture_events, + test_agent, + get_model_response, + responses_tool_call_model_responses, ): """ Test that tool execution errors are properly tracked via error tracing patch. @@ -2321,75 +2299,45 @@ def failing_tool(message: str) -> str: model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent_with_tool = test_agent.clone(tools=[failing_tool], model=model) - tool_response = get_model_response( - Response( - id="resp_1", - output=[ - ResponseFunctionToolCall( - id="call_123", - call_id="call_123", - name="failing_tool", - type="function_call", - arguments='{"message": "test"}', - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, + responses = responses_tool_call_model_responses( + tool_name="failing_tool", + arguments='{"message": "test"}', + response_model="gpt-4-0613", + response_text="An error occurred while running the tool", + response_ids=iter(["resp_1", "resp_2"]), + usages=iter( + [ + ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=15, ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, + ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=25, ), - total_tokens=15, - ), + ] ), + ) + tool_response = get_model_response( + next(responses), serialize_pydantic=True, ) - final_response = get_model_response( - Response( - id="resp_2", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="An error occurred while running the tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4-0613", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), + next(responses), serialize_pydantic=True, ) @@ -2420,7 +2368,10 @@ def failing_tool(message: str) -> str: # Find the execute_tool span execute_tool_span = None for span in spans: - if span.get("description", "").startswith("execute_tool failing_tool"): + description = span.get("description", "") + if description is not None and description.startswith( + "execute_tool failing_tool" + ): execute_tool_span = span break From df300a9474571d27cb95f78f286ef6bef0c00430 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 08:42:35 +0100 Subject: [PATCH 14/60] test(langchain): Add text completion test --- .../integrations/langchain/test_langchain.py | 154 +++++++++--------- 1 file changed, 76 insertions(+), 78 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 86f8e6ad1a..149a4a690f 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -9,10 +9,10 @@ try: # Langchain >= 0.2 - from langchain_openai import ChatOpenAI + from langchain_openai import ChatOpenAI, OpenAI except ImportError: # Langchain < 0.2 - from langchain_community.chat_models import ChatOpenAI + from langchain_community.chat_models import ChatOpenAI, OpenAI from langchain_core.callbacks import BaseCallbackManager, CallbackManagerForLLMRun from langchain_core.messages import BaseMessage, AIMessageChunk @@ -50,6 +50,9 @@ ChoiceDeltaToolCallFunction, ) +from openai.types.completion import Completion +from openai.types.completion_choice import CompletionChoice + from openai.types.completion_usage import ( CompletionUsage, ) @@ -91,6 +94,77 @@ def _llm_type(self) -> str: return llm_type +def test_langchain_text_completion( + sentry_init, + capture_events, + get_model_response, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=True, + ) + ], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + model_repsonse = get_model_response( + Completion( + id="completion-id", + object="text_completion", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + CompletionChoice( + index=0, + finish_reason="stop", + text="The capital of France is Paris.", + ) + ], + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=15, + total_tokens=25, + ), + ), + serialize_pydantic=True, + ) + + model = OpenAI( + model_name="gpt-3.5-turbo", + temperature=0.7, + max_tokens=100, + openai_api_key="badkey", + ) + + with patch.object( + model.client._client._client, + "send", + return_value=model_repsonse, + ) as _: + with start_transaction(): + input_text = "What is the capital of France?" + model.invoke(input_text) + + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" + ] + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + assert llm_span["description"] == "Langchain LLM call" + assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" + assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris." + assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 + assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10 + assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 + + @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", @@ -1026,82 +1100,6 @@ def test_langchain_callback_list_existing_callback(sentry_init): assert handler is sentry_callback -def test_langchain_integration_with_langchain_core_only(sentry_init, capture_events): - """Test that the langchain integration works when langchain.agents.AgentExecutor - is not available or langchain is not installed, but langchain-core is. - """ - - from langchain_core.outputs import LLMResult, Generation - - with patch("sentry_sdk.integrations.langchain.AgentExecutor", None): - from sentry_sdk.integrations.langchain import ( - LangchainIntegration, - SentryLangchainCallback, - ) - - sentry_init( - integrations=[LangchainIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - try: - LangchainIntegration.setup_once() - except Exception as e: - pytest.fail(f"setup_once() failed when AgentExecutor is None: {e}") - - callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) - - run_id = "12345678-1234-1234-1234-123456789012" - serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} - prompts = ["What is the capital of France?"] - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The capital of France is Paris.")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - llm_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" - ] - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - assert llm_span["description"] == "Langchain LLM call" - assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" - assert ( - llm_span["data"]["gen_ai.response.text"] - == "The capital of France is Paris." - ) - assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 - assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10 - assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 - - def test_langchain_message_role_mapping(sentry_init, capture_events): """Test that message roles are properly normalized in langchain integration.""" global llm_type From b6e9f70e717aad54340fee887a76129589633cd8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 11:15:28 +0100 Subject: [PATCH 15/60] add agent invocation origin assertion --- tests/integrations/langchain/test_langchain.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index b6a098ea83..741fa28f69 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -216,11 +216,13 @@ def test_langchain_agent( assert tx["type"] == "transaction" assert tx["contexts"]["trace"]["origin"] == "manual" + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") assert len(chat_spans) == 2 + assert invoke_agent_span["origin"] == "auto.ai.langchain" assert chat_spans[0]["origin"] == "auto.ai.langchain" assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" From 86f4a42ae5acc41713b70362ff20208154b078d1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 11:24:25 +0100 Subject: [PATCH 16/60] do not use mutable default values --- tests/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5dd62931f1..fe6f7c9a51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1039,7 +1039,10 @@ def inner(events, include_event_type=True): @pytest.fixture def get_model_response(): - def inner(response_content, serialize_pydantic=False, request_headers={}): + def inner(response_content, serialize_pydantic=False, request_headers=None): + if request_headers is None: + request_headers = {} + model_request = HttpxRequest( "POST", "/responses", From e6bfe4d21d1ce6b40fcc7a8496367d939d3f849f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 11:26:18 +0100 Subject: [PATCH 17/60] remove redundant assertion --- tests/integrations/langchain/test_langchain.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index b7016f226f..11a0fe3042 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -336,9 +336,6 @@ def test_tool_execution_span( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - # We can't guarantee anything about the "shape" of the langchain execution graph - assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 - # Token usage is only available in newer versions of langchain (v0.2+) # where usage_metadata is supported on AIMessageChunk if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: From 8e2cec9e05e41b20ef619d0c57f602b76d2ecafe Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 11:28:21 +0100 Subject: [PATCH 18/60] fix legacy import --- tests/integrations/langchain/test_langchain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 02293f2b7c..c67baf7c3f 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -12,7 +12,8 @@ from langchain_openai import ChatOpenAI, OpenAI except ImportError: # Langchain < 0.2 - from langchain_community.chat_models import ChatOpenAI, OpenAI + from langchain_community.llms import OpenAI + from langchain_community.chat_models import ChatOpenAI from langchain_core.callbacks import BaseCallbackManager, CallbackManagerForLLMRun from langchain_core.messages import BaseMessage, AIMessageChunk From 4dc9f041c145c479e6a3db85aaf6fea8e9fc3420 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 12:01:01 +0100 Subject: [PATCH 19/60] keep old is not None assertion --- tests/integrations/langchain/test_langchain.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 65e3073c5d..9d55d8f313 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -306,6 +306,9 @@ def test_langchain_agent( # Verify that available tools are always recorded regardless of PII settings for chat_span in chat_spans: tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) assert "get_word_length" in tools_data From 62f4f38d8d6aa18d378fa580fb8827aeaf2dd400 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 13:52:40 +0100 Subject: [PATCH 20/60] remove types with default values --- tests/integrations/langchain/test_langchain.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 5c2acf4141..7842f27f13 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -49,9 +49,7 @@ ) from openai.types.completion_usage import ( - CompletionTokensDetails, CompletionUsage, - PromptTokensDetails, ) @@ -233,14 +231,6 @@ def test_langchain_agent( prompt_tokens=142, completion_tokens=50, total_tokens=192, - prompt_tokens_details=PromptTokensDetails( - audio_tokens=0, - cached_tokens=0, - ), - completion_tokens_details=CompletionTokensDetails( - audio_tokens=0, - reasoning_tokens=0, - ), ), ), ], @@ -300,14 +290,6 @@ def test_langchain_agent( prompt_tokens=89, completion_tokens=28, total_tokens=117, - prompt_tokens_details=PromptTokensDetails( - audio_tokens=0, - cached_tokens=0, - ), - completion_tokens_details=CompletionTokensDetails( - audio_tokens=0, - reasoning_tokens=0, - ), ), ), ], From 787bf587c636e4061e6be8d01c68eee6a21289be Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 16:37:13 +0100 Subject: [PATCH 21/60] do not gate token assertions and typo --- tests/integrations/langchain/test_langchain.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 52be827dc0..5aec4f10b7 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -128,7 +128,7 @@ def test_langchain_create_agent( ) events = capture_events() - model_repsonse = get_model_response( + model_response = get_model_response( nonstreaming_responses_model_response, serialize_pydantic=True, request_headers={ @@ -152,7 +152,7 @@ def test_langchain_create_agent( with patch.object( llm.client._client._client, "send", - return_value=model_repsonse, + return_value=model_response, ) as _: with start_transaction(): agent.invoke( @@ -171,12 +171,9 @@ def test_langchain_create_agent( assert len(chat_spans) == 1 assert chat_spans[0]["origin"] == "auto.ai.langchain" - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: - assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 - assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 - assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 if send_default_pii and include_prompts: assert ( From 1b4b2baf31e6462a985dd2c1375bb705ddb24083 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 16:39:45 +0100 Subject: [PATCH 22/60] do not gate token assertions --- tests/integrations/langchain/test_langchain.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 122c45b224..45b7d28c27 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -333,17 +333,13 @@ def test_tool_execution_span( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - # Token usage is only available in newer versions of langchain (v0.2+) - # where usage_metadata is supported on AIMessageChunk - if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: - assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 - assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 - assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 - if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: - assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 - assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 - assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] From c7e871b030d4e76e71dc45652330240b01ca7796 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 18:19:42 +0100 Subject: [PATCH 23/60] assert that there is only one tool span --- tests/integrations/langchain/test_langchain.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 45b7d28c27..ed5cda83a6 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -325,10 +325,12 @@ def test_tool_execution_span( assert tx["contexts"]["trace"]["origin"] == "manual" chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") - tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") - assert len(chat_spans) == 2 + tool_exec_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + assert len(tool_exec_spans) == 1 + tool_exec_span = tool_exec_spans[0] + assert chat_spans[0]["origin"] == "auto.ai.langchain" assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" From 09673b74eec68453834faca52012df5039ebdce1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 23 Mar 2026 18:23:45 +0100 Subject: [PATCH 24/60] expect string response tool calls --- tests/integrations/langchain/test_langchain.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index ed5cda83a6..5a7032d552 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -353,13 +353,8 @@ def test_tool_execution_span( "Tool calls should be recorded when send_default_pii=True and include_prompts=True" ) tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - assert isinstance(tool_calls_data, (list, str)) # Could be serialized - if isinstance(tool_calls_data, str): - assert "get_word_length" in tool_calls_data - elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: - # Check if tool calls contain expected function name - tool_call_str = str(tool_calls_data) - assert "get_word_length" in tool_call_str + assert isinstance(tool_calls_data, str) + assert "get_word_length" in tool_calls_data else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) From 503b28ca75660ae978ef8ee7618841d62b6b0918 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 24 Mar 2026 16:36:22 +0100 Subject: [PATCH 25/60] . --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 9d389dc9a6..508b21760d 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -153,7 +153,7 @@ def test_langchain_text_completion( assert tx["type"] == "transaction" llm_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.generate_text" ] assert len(llm_spans) > 0 From 0631f88befdcffd039371eaad8269c0811e1d357 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 24 Mar 2026 16:39:27 +0100 Subject: [PATCH 26/60] . --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 508b21760d..4f6d38bb49 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -158,7 +158,7 @@ def test_langchain_text_completion( assert len(llm_spans) > 0 llm_span = llm_spans[0] - assert llm_span["description"] == "Langchain LLM call" + assert llm_span["description"] == "generate_text gpt-3.5-turbo" assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris." assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 From 6f71424e6e513ba08980098757ed2cbbfb427aa2 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 24 Mar 2026 16:43:54 +0100 Subject: [PATCH 27/60] typo --- tests/integrations/langchain/test_langchain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 4f6d38bb49..46d831a3ea 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -111,7 +111,7 @@ def test_langchain_text_completion( ) events = capture_events() - model_repsonse = get_model_response( + model_response = get_model_response( Completion( id="completion-id", object="text_completion", @@ -143,7 +143,7 @@ def test_langchain_text_completion( with patch.object( model.client._client._client, "send", - return_value=model_repsonse, + return_value=model_response, ) as _: with start_transaction(): input_text = "What is the capital of France?" From d4c4cd4513c54e725f5917be32080cdc820dabf5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 25 Mar 2026 14:03:18 +0100 Subject: [PATCH 28/60] fix(langchain): Set agent name as gen_ai.agent.name --- sentry_sdk/integrations/langchain.py | 154 +++++++----------- .../integrations/langchain/test_langchain.py | 6 + 2 files changed, 66 insertions(+), 94 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index f4ec75310d..11b44b7096 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -1,4 +1,3 @@ -import contextvars import itertools import sys import json @@ -153,44 +152,6 @@ def _transform_langchain_message_content(content: "Any") -> "Any": return content -# Contextvar to track agent names in a stack for re-entrant agent support -_agent_stack: "contextvars.ContextVar[Optional[List[Optional[str]]]]" = ( - contextvars.ContextVar("langchain_agent_stack", default=None) -) - - -def _push_agent(agent_name: "Optional[str]") -> None: - """Push an agent name onto the stack.""" - stack = _agent_stack.get() - if stack is None: - stack = [] - else: - # Copy the list to maintain contextvar isolation across async contexts - stack = stack.copy() - stack.append(agent_name) - _agent_stack.set(stack) - - -def _pop_agent() -> "Optional[str]": - """Pop an agent name from the stack and return it.""" - stack = _agent_stack.get() - if stack: - # Copy the list to maintain contextvar isolation across async contexts - stack = stack.copy() - agent_name = stack.pop() - _agent_stack.set(stack) - return agent_name - return None - - -def _get_current_agent() -> "Optional[str]": - """Get the current agent name (top of stack) without removing it.""" - stack = _agent_stack.get() - if stack: - return stack[-1] - return None - - def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]": system_instructions = [] @@ -454,8 +415,8 @@ def on_chat_model_start( elif "openai" in ai_type: span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - agent_name = _get_current_agent() - if agent_name: + agent_name = kwargs.get("metadata", {}).get("lc_agent_name") + if agent_name is not None: span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) for key, attribute in DATA_FIELDS.items(): @@ -654,8 +615,8 @@ def on_tool_start( if tool_description is not None: span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_description) - agent_name = _get_current_agent() - if agent_name: + agent_name = kwargs.get("metadata", {}).get("lc_agent_name") + if agent_name is not None: span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) if should_send_default_pii() and self.include_prompts: @@ -782,9 +743,7 @@ def _record_token_usage(span: "Span", response: "Any") -> None: span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens) -def _get_request_data( - obj: "Any", args: "Any", kwargs: "Any" -) -> "tuple[Optional[str], Optional[List[Any]]]": +def _get_available_tools(obj: "Any") -> "tuple[Optional[str], Optional[List[Any]]]": """ Get the agent name and available tools for the agent. """ @@ -799,6 +758,13 @@ def _get_request_data( ) tools = tools if tools and len(tools) > 0 else None + return tools + + +def _get_run_name(obj: "Any", args: "Any"): + agent = getattr(obj, "agent", None) + runnable = getattr(agent, "runnable", None) + runnable_config = getattr(runnable, "config", {}) try: agent_name = None if len(args) > 1: @@ -808,7 +774,7 @@ def _get_request_data( except Exception: pass - return (agent_name, tools) + return agent_name def _simplify_langchain_tools(tools: "Any") -> "Optional[List[Any]]": @@ -976,58 +942,53 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": if integration is None: return f(self, *args, **kwargs) - agent_name, tools = _get_request_data(self, args, kwargs) start_span_function = get_start_span_function() - + run_name = _get_run_name(self, args) with start_span_function( op=OP.GEN_AI_INVOKE_AGENT, - name=f"invoke_agent {agent_name}" if agent_name else "invoke_agent", + name=run_name, origin=LangchainIntegration.origin, ) as span: - _push_agent(agent_name) - try: - if agent_name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) + if run_name: + span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) - _set_tools_on_span(span, tools) + tools = _get_available_tools(self) + _set_tools_on_span(span, tools) - # Run the agent - result = f(self, *args, **kwargs) + # Run the agent + result = f(self, *args, **kwargs) - input = result.get("input") - if ( - input is not None - and should_send_default_pii() - and integration.include_prompts - ): - normalized_messages = normalize_message_roles([input]) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + input = result.get("input") + if ( + input is not None + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles([input]) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, ) - if messages_data is not None: - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - messages_data, - unpack=False, - ) - output = result.get("output") - if ( - output is not None - and should_send_default_pii() - and integration.include_prompts - ): - set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output) + output = result.get("output") + if ( + output is not None + and should_send_default_pii() + and integration.include_prompts + ): + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output) - return result - finally: - # Ensure agent is popped even if an exception occurs - _pop_agent() + return result return new_invoke @@ -1039,24 +1000,31 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": if integration is None: return f(self, *args, **kwargs) - agent_name, tools = _get_request_data(self, args, kwargs) start_span_function = get_start_span_function() + agent_name = kwargs.get("metadata", {}).get("lc_agent_name") + run_name = _get_run_name(self, args) + + span_name = "invoke_agent" + if agent_name is not None: + span_name = f"invoke_agent {agent_name}" + elif run_name: + span_name = f"invoke_agent {run_name}" + span = start_span_function( op=OP.GEN_AI_INVOKE_AGENT, - name=f"invoke_agent {agent_name}" if agent_name else "invoke_agent", + name=span_name, origin=LangchainIntegration.origin, ) span.__enter__() - _push_agent(agent_name) - - if agent_name: + if agent_name is not None: span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) + tools = _get_available_tools(self) _set_tools_on_span(span, tools) input = args[0].get("input") if len(args) >= 1 else None @@ -1106,7 +1074,6 @@ def new_iterator() -> "Iterator[Any]": raise finally: # Ensure cleanup happens even if iterator is abandoned or fails - _pop_agent() span.__exit__(*exc_info) async def new_iterator_async() -> "AsyncIterator[Any]": @@ -1132,7 +1099,6 @@ async def new_iterator_async() -> "AsyncIterator[Any]": raise finally: # Ensure cleanup happens even if iterator is abandoned or fails - _pop_agent() span.__exit__(*exc_info) if str(type(result)) == "": diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 46d831a3ea..7e5041c4be 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -254,6 +254,8 @@ def test_langchain_create_agent( assert len(chat_spans) == 1 assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 @@ -410,6 +412,10 @@ def test_tool_execution_span( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[1]["data"]["gen_ai.agent.name"] == "word_length_agent" + assert tool_exec_span["data"]["gen_ai.agent.name"] == "word_length_agent" + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 From 8cf3f81a59a1458ff5695418e03cc66582a8a612 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 09:14:30 +0200 Subject: [PATCH 29/60] typing --- sentry_sdk/integrations/langchain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 6430477e5a..b685a838aa 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -754,7 +754,7 @@ def _record_token_usage(span: "Span", response: "Any") -> None: span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens) -def _get_available_tools(obj: "Any") -> "tuple[Optional[str], Optional[List[Any]]]": +def _get_available_tools(obj: "Any") -> "Optional[List[Any]]": """ Get the agent name and available tools for the agent. """ @@ -772,7 +772,7 @@ def _get_available_tools(obj: "Any") -> "tuple[Optional[str], Optional[List[Any] return tools -def _get_run_name(obj: "Any", args: "Any"): +def _get_run_name(obj: "Any", args: "Any") -> "Optional[str]": agent = getattr(obj, "agent", None) runnable = getattr(agent, "runnable", None) runnable_config = getattr(runnable, "config", {}) From 56ec48f644714f9a1f2645257985efe5880a2826 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 09:19:51 +0200 Subject: [PATCH 30/60] fix span description --- sentry_sdk/integrations/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index b685a838aa..c6e7346d7b 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -957,7 +957,7 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": run_name = _get_run_name(self, args) with start_span_function( op=OP.GEN_AI_INVOKE_AGENT, - name=run_name, + name=f"invoke_agent {run_name}" if run_name else "invoke_agent", origin=LangchainIntegration.origin, ) as span: if run_name: From d8c06f8cce3fffe728ccc38966c4901777e8f3a8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 09:29:08 +0200 Subject: [PATCH 31/60] defensive check --- sentry_sdk/integrations/langchain.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index c6e7346d7b..6279a29d6b 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -426,9 +426,11 @@ def on_chat_model_start( if ai_system: span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system) - agent_name = kwargs.get("metadata", {}).get("lc_agent_name") - if agent_name is not None: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) + agent_metadata = kwargs.get("metadata") + if isinstance(agent_metadata, dict) and "lc_agent_name" in agent_metadata: + span.set_data( + SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"] + ) for key, attribute in DATA_FIELDS.items(): if key in all_params and all_params[key] is not None: @@ -626,9 +628,11 @@ def on_tool_start( if tool_description is not None: span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_description) - agent_name = kwargs.get("metadata", {}).get("lc_agent_name") - if agent_name is not None: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) + agent_metadata = kwargs.get("metadata") + if isinstance(agent_metadata, dict) and "lc_agent_name" in agent_metadata: + span.set_data( + SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"] + ) if should_send_default_pii() and self.include_prompts: set_data_normalized( From 36ca817269ff88a1f144402732e2be43cbde7fbd Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 09:34:06 +0200 Subject: [PATCH 32/60] no agent name in stream --- sentry_sdk/integrations/langchain.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 6279a29d6b..c786b22562 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -1017,25 +1017,15 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": start_span_function = get_start_span_function() - agent_name = kwargs.get("metadata", {}).get("lc_agent_name") run_name = _get_run_name(self, args) - span_name = "invoke_agent" - if agent_name is not None: - span_name = f"invoke_agent {agent_name}" - elif run_name: - span_name = f"invoke_agent {run_name}" - span = start_span_function( op=OP.GEN_AI_INVOKE_AGENT, - name=span_name, + name=f"invoke_agent {run_name}" if run_name else "invoke_agent", origin=LangchainIntegration.origin, ) span.__enter__() - if agent_name is not None: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name) - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) From 87ed0608417b7da872471a611996a04a327f5322 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 14:14:46 +0200 Subject: [PATCH 33/60] feat(langchain): Record run_name in on_chat_model_start --- sentry_sdk/integrations/langchain.py | 7 +++ .../integrations/langchain/test_langchain.py | 50 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index c786b22562..2d64c22325 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -432,6 +432,13 @@ def on_chat_model_start( SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"] ) + run_name = kwargs.get("name") + if run_name is not None: + span.set_data( + SPANDATA.GEN_AI_PIPELINE_NAME, + run_name, + ) + for key, attribute in DATA_FIELDS.items(): if key in all_params and all_params[key] is not None: set_data_normalized(span, attribute, all_params[key], unpack=False) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 9243fcda53..0f8b5bed51 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -170,6 +170,56 @@ def test_langchain_text_completion( assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 +def test_langchain_chat( + sentry_init, + capture_events, + get_model_response, + nonstreaming_responses_model_response, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=True, + ) + ], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + model_response = get_model_response( + nonstreaming_responses_model_response, + serialize_pydantic=True, + request_headers={ + "X-Stainless-Raw-Response": "True", + }, + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + use_responses_api=True, + ) + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _: + with start_transaction(): + llm.invoke( + "How many letters in the word eudca", + config={"run_name": "my-snazzy-pipeline"}, + ) + + tx = events[0] + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert chat_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", From ee0b7d94228331c23d507a76d3aa5475a58a71b1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 14:25:10 +0200 Subject: [PATCH 34/60] feat(langchain): Record run_name in on_tool_start --- sentry_sdk/integrations/langchain.py | 7 +++++ .../integrations/langchain/test_langchain.py | 27 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 2d64c22325..3dec48c7f1 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -641,6 +641,13 @@ def on_tool_start( SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"] ) + run_name = kwargs.get("name") + if run_name is not None: + span.set_data( + SPANDATA.GEN_AI_PIPELINE_NAME, + run_name, + ) + if should_send_default_pii() and self.include_prompts: set_data_normalized( span, diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 0f8b5bed51..029bfa7986 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -220,6 +220,33 @@ def test_langchain_chat( assert chat_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" +def test_langchain_tool( + sentry_init, + capture_events, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=True, + ) + ], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + with start_transaction(): + get_word_length.invoke( + {"word": "eudca"}, + config={"run_name": "my-snazzy-pipeline"}, + ) + + tx = events[0] + tool_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + assert len(tool_spans) == 1 + assert tool_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", From ea94bfcb1241ee3110fb657f182c4f194466c424 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 14:37:34 +0200 Subject: [PATCH 35/60] . --- tests/conftest.py | 24 ++++ .../integrations/langchain/test_langchain.py | 5 +- tests/integrations/openai/test_openai.py | 109 +++++++++++------- 3 files changed, 94 insertions(+), 44 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 71f2431aac..6a15d3668f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1102,6 +1102,30 @@ def nonstreaming_responses_model_response(): ) +@pytest.fixture +def nonstreaming_chat_completions_model_response(): + return openai.types.chat.ChatCompletion( + id="chat-id", + choices=[ + openai.types.chat.chat_completion.Choice( + index=0, + finish_reason="stop", + message=openai.types.chat.ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="response-model-id", + object="chat.completion", + usage=openai.types.CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), + ) + + @pytest.fixture def responses_tool_call_model_responses(): def inner( diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 0f8b5bed51..269b4052e2 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -174,7 +174,7 @@ def test_langchain_chat( sentry_init, capture_events, get_model_response, - nonstreaming_responses_model_response, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[ @@ -188,7 +188,7 @@ def test_langchain_chat( events = capture_events() model_response = get_model_response( - nonstreaming_responses_model_response, + nonstreaming_chat_completions_model_response, serialize_pydantic=True, request_headers={ "X-Stainless-Raw-Response": "True", @@ -199,7 +199,6 @@ def test_langchain_chat( model_name="gpt-3.5-turbo", temperature=0, openai_api_key="badkey", - use_responses_api=True, ) with patch.object( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0fd049e742..cd60afe551 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -15,9 +15,8 @@ Omit = None from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError -from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding -from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk -from openai.types.chat.chat_completion import Choice +from openai.types import CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage @@ -60,26 +59,6 @@ async def __call__(self, *args, **kwargs): OPENAI_VERSION = package_version("openai") -EXAMPLE_CHAT_COMPLETION = ChatCompletion( - id="chat-id", - choices=[ - Choice( - index=0, - finish_reason="stop", - message=ChatCompletionMessage( - role="assistant", content="the model response" - ), - ) - ], - created=10000000, - model="response-model-id", - object="chat.completion", - usage=CompletionUsage( - completion_tokens=10, - prompt_tokens=20, - total_tokens=30, - ), -) if SKIP_RESPONSES_TESTS: @@ -131,7 +110,11 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_chat_completion_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, + capture_events, + send_default_pii, + include_prompts, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], @@ -141,7 +124,9 @@ def test_nonstreaming_chat_completion_no_prompts( events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): response = ( @@ -228,7 +213,13 @@ def test_nonstreaming_chat_completion_no_prompts( ), ], ) -def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request): +def test_nonstreaming_chat_completion( + sentry_init, + capture_events, + messages, + request, + nonstreaming_chat_completions_model_response, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -237,7 +228,9 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): response = ( @@ -307,7 +300,11 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ], ) async def test_nonstreaming_chat_completion_async_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, + capture_events, + send_default_pii, + include_prompts, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], @@ -317,7 +314,9 @@ async def test_nonstreaming_chat_completion_async_no_prompts( events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.AsyncMock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): response = await client.chat.completions.create( @@ -403,7 +402,11 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ], ) async def test_nonstreaming_chat_completion_async( - sentry_init, capture_events, messages, request + sentry_init, + capture_events, + messages, + request, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -413,7 +416,9 @@ async def test_nonstreaming_chat_completion_async( events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = AsyncMock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): response = await client.chat.completions.create( @@ -1551,7 +1556,9 @@ async def test_embeddings_create_raises_error_async( assert event["level"] == "error" -def test_span_origin_nonstreaming_chat(sentry_init, capture_events): +def test_span_origin_nonstreaming_chat( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -1559,7 +1566,9 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): client.chat.completions.create( @@ -1573,7 +1582,9 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): @pytest.mark.asyncio -async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): +async def test_span_origin_nonstreaming_chat_async( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -1581,7 +1592,9 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = AsyncMock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): await client.chat.completions.create( @@ -3125,7 +3138,9 @@ async def test_streaming_responses_api_async( "tools", [[], None, NOT_GIVEN, omit], ) -def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): +def test_empty_tools_in_chat_completion( + sentry_init, capture_events, tools, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -3133,7 +3148,9 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) with start_transaction(name="openai tx"): client.chat.completions.create( @@ -3164,7 +3181,11 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): ], ) def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, + capture_events, + test_message, + expected_role, + nonstreaming_chat_completions_model_response, ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" @@ -3176,7 +3197,9 @@ def test_openai_message_role_mapping( events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) test_messages = [test_message] @@ -3197,7 +3220,9 @@ def test_openai_message_role_mapping( assert stored_messages[0]["role"] == expected_role -def test_openai_message_truncation(sentry_init, capture_events): +def test_openai_message_truncation( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -3207,7 +3232,9 @@ def test_openai_message_truncation(sentry_init, capture_events): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response + ) large_content = ( "This is a very long message that will exceed our size limits. " * 1000 From cd08d96d0cfd7c9c900808ec55649a8eeca13225 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 15:06:21 +0200 Subject: [PATCH 36/60] . --- tests/integrations/langchain/test_langchain.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 269b4052e2..e514b30958 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -67,6 +67,7 @@ ) LANGCHAIN_VERSION = package_version("langchain") +LANGCHAIN_OPENAI_VERSION = package_version("langchain-openai") @tool @@ -187,12 +188,15 @@ def test_langchain_chat( ) events = capture_events() + request_headers = {} + # Changed in https://github.com/langchain-ai/langchain/pull/32655 + if LANGCHAIN_OPENAI_VERSION >= (0, 3, 32): + request_headers["X-Stainless-Raw-Response"] = "True" + model_response = get_model_response( nonstreaming_chat_completions_model_response, serialize_pydantic=True, - request_headers={ - "X-Stainless-Raw-Response": "True", - }, + request_headers=request_headers, ) llm = ChatOpenAI( From 0d43616ca8e4ff4888d52cbc8d2fddbe199d41c9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 15:18:12 +0200 Subject: [PATCH 37/60] simplify --- sentry_sdk/integrations/langchain.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index c786b22562..87c01ff326 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -758,7 +758,9 @@ def _record_token_usage(span: "Span", response: "Any") -> None: span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens) -def _get_available_tools(obj: "Any") -> "Optional[List[Any]]": +def _get_request_data( + obj: "Any", args: "Any", kwargs: "Any" +) -> "tuple[Optional[str], Optional[List[Any]]]": """ Get the agent name and available tools for the agent. """ @@ -773,13 +775,6 @@ def _get_available_tools(obj: "Any") -> "Optional[List[Any]]": ) tools = tools if tools and len(tools) > 0 else None - return tools - - -def _get_run_name(obj: "Any", args: "Any") -> "Optional[str]": - agent = getattr(obj, "agent", None) - runnable = getattr(agent, "runnable", None) - runnable_config = getattr(runnable, "config", {}) try: agent_name = None if len(args) > 1: @@ -789,7 +784,7 @@ def _get_run_name(obj: "Any", args: "Any") -> "Optional[str]": except Exception: pass - return agent_name + return (agent_name, tools) def _simplify_langchain_tools(tools: "Any") -> "Optional[List[Any]]": @@ -957,8 +952,9 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": if integration is None: return f(self, *args, **kwargs) + run_name, tools = _get_request_data(self, args, kwargs) start_span_function = get_start_span_function() - run_name = _get_run_name(self, args) + with start_span_function( op=OP.GEN_AI_INVOKE_AGENT, name=f"invoke_agent {run_name}" if run_name else "invoke_agent", @@ -970,7 +966,6 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) - tools = _get_available_tools(self) _set_tools_on_span(span, tools) # Run the agent @@ -1015,10 +1010,9 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": if integration is None: return f(self, *args, **kwargs) + run_name, tools = _get_request_data(self, args, kwargs) start_span_function = get_start_span_function() - run_name = _get_run_name(self, args) - span = start_span_function( op=OP.GEN_AI_INVOKE_AGENT, name=f"invoke_agent {run_name}" if run_name else "invoke_agent", @@ -1029,7 +1023,6 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) - tools = _get_available_tools(self) _set_tools_on_span(span, tools) input = args[0].get("input") if len(args) >= 1 else None From 568e6f7eb4858423793369536b53ec8310ee4a69 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 15:24:59 +0200 Subject: [PATCH 38/60] truthy check --- sentry_sdk/integrations/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 2d64c22325..8115b660b1 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -433,7 +433,7 @@ def on_chat_model_start( ) run_name = kwargs.get("name") - if run_name is not None: + if run_name: span.set_data( SPANDATA.GEN_AI_PIPELINE_NAME, run_name, From 471260ede3f805c1c66b3d38ba42c890bbfb91de Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 31 Mar 2026 15:26:00 +0200 Subject: [PATCH 39/60] truthy check --- sentry_sdk/integrations/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 3dec48c7f1..54c90172fb 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -642,7 +642,7 @@ def on_tool_start( ) run_name = kwargs.get("name") - if run_name is not None: + if run_name: span.set_data( SPANDATA.GEN_AI_PIPELINE_NAME, run_name, From b9387b8d4aae379cbf665cdacf329fdad9bb1e88 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 08:52:22 +0200 Subject: [PATCH 40/60] set run name --- sentry_sdk/integrations/langchain.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 87c01ff326..7a9d02f521 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -1020,6 +1020,9 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": ) span.__enter__() + if run_name: + span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) From dd79d6f27590559c3e7e71352ab9a8616e8be051 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 09:05:45 +0200 Subject: [PATCH 41/60] feat(langchain): Record run_name as gen_ai.pipeline.name on Invoke Agent Spans --- sentry_sdk/integrations/langchain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 21447a6655..5b1540560d 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -975,7 +975,7 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": origin=LangchainIntegration.origin, ) as span: if run_name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) @@ -1035,7 +1035,7 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": span.__enter__() if run_name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) From 77af6f29d0f345d07616613e6a37e69e188b9742 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:01:38 +0200 Subject: [PATCH 42/60] add tests --- .../integrations/langchain/test_langchain.py | 377 ++++++++++-------- 1 file changed, 215 insertions(+), 162 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e7edd645f2..b2a2e72f90 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -43,14 +43,6 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.messages import HumanMessage, SystemMessage -from openai.types.chat.chat_completion_chunk import ( - ChatCompletionChunk, - Choice, - ChoiceDelta, - ChoiceDeltaToolCall, - ChoiceDeltaToolCallFunction, -) - from openai.types.completion import Completion from openai.types.completion_choice import CompletionChoice @@ -581,6 +573,7 @@ def test_langchain_openai_tools_agent( request, get_model_response, server_side_event_chunks, + streaming_chat_completions_model_responses, ): sentry_init( integrations=[ @@ -604,167 +597,18 @@ def test_langchain_openai_tools_agent( ] ) + model_responses = streaming_chat_completions_model_responses() + tool_response = get_model_response( server_side_event_chunks( - [ - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(role="assistant"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=0, - id="call_BbeyNhCKa6kYLYzrD40NGm3b", - type="function", - function=ChoiceDeltaToolCallFunction( - name="get_word_length", - arguments="", - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=0, - function=ChoiceDeltaToolCallFunction( - arguments='{"word": "eudca"}', - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(content="5"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(), - finish_reason="function_call", - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=CompletionUsage( - prompt_tokens=142, - completion_tokens=50, - total_tokens=192, - ), - ), - ], + next(model_responses), include_event_type=False, ) ) final_response = get_model_response( server_side_event_chunks( - [ - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(role="assistant"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(content="The word eudca has 5 letters."), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(), - finish_reason="stop", - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=CompletionUsage( - prompt_tokens=89, - completion_tokens=28, - total_tokens=117, - ), - ), - ], + next(model_responses), include_event_type=False, ) ) @@ -784,7 +628,12 @@ def test_langchain_openai_tools_agent( side_effect=[tool_response, final_response], ) as _: with start_transaction(): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) tx = events[0] assert tx["type"] == "transaction" @@ -801,6 +650,210 @@ def test_langchain_openai_tools_agent( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_stream( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 From d02f1cad11b3f3751c7e93dc2d677152cb4f1edf Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:17:14 +0200 Subject: [PATCH 43/60] add conftest --- tests/conftest.py | 169 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 6a15d3668f..00dafe6ce2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1126,6 +1126,175 @@ def nonstreaming_chat_completions_model_response(): ) +@pytest.fixture +def streaming_chat_completions_model_responses(): + def inner(): + yield [ + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role="assistant" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + tool_calls=[ + openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( + index=0, + id="call_BbeyNhCKa6kYLYzrD40NGm3b", + type="function", + function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( + name="get_word_length", + arguments="", + ), + ), + ], + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + tool_calls=[ + openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( + index=0, + function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( + arguments='{"word": "eudca"}', + ), + ), + ], + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + content="5" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), + finish_reason="function_call", + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=openai.types.chat.chat_completion_chunk.CompletionUsage( + prompt_tokens=142, + completion_tokens=50, + total_tokens=192, + ), + ), + ] + + yield [ + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role="assistant" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + content="The word eudca has 5 letters." + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), + finish_reason="stop", + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=openai.types.chat.chat_completion_chunk.CompletionUsage( + prompt_tokens=89, + completion_tokens=28, + total_tokens=117, + ), + ), + ] + + return inner + + @pytest.fixture def responses_tool_call_model_responses(): def inner( From bf72fb2ae5248cb070427bf417c3e08f6b2ed70b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:35:16 +0200 Subject: [PATCH 44/60] add tests --- .../integrations/langchain/test_langchain.py | 406 ++++++++++++++++++ 1 file changed, 406 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index b2a2e72f90..31d4789891 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -745,6 +745,209 @@ def test_langchain_openai_tools_agent( assert "get_word_length" in tools_data +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_with_config( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt).with_config( + {"run_name": "my-snazzy-pipeline"} + ) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -947,6 +1150,209 @@ def test_langchain_openai_tools_agent_stream( assert "get_word_length" in tools_data +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_stream_with_config( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt).with_config( + {"run_name": "my-snazzy-pipeline"} + ) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + def test_langchain_error(sentry_init, capture_events): global llm_type llm_type = "acme-llm" From 848797a18da1d52b4359d7f2d5694c5348d221b5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 11:21:53 +0200 Subject: [PATCH 45/60] update test name --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e7edd645f2..a711ef0c7b 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -223,7 +223,7 @@ def test_langchain_chat( assert chat_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" -def test_langchain_tool( +def test_langchain_tool_call_with_run_name( sentry_init, capture_events, ): From 52eb5c358d8e318920d3ccdd60ceea69e8881505 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 11:28:44 +0200 Subject: [PATCH 46/60] more descriptive test name --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e514b30958..bd9ab894c4 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -171,7 +171,7 @@ def test_langchain_text_completion( assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 -def test_langchain_chat( +def test_langchain_chat_with_run_name( sentry_init, capture_events, get_model_response, From 5bddf72f048dfe3f124e70e0e07ce386aab26be8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 10:51:44 +0200 Subject: [PATCH 47/60] import order --- tests/integrations/openai/test_openai.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 7fb9708e9f..4a5215b7ae 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -15,8 +15,9 @@ Omit = None from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError -from openai.types import CreateEmbeddingResponse, Embedding -from openai.types.chat import ChatCompletionChunk +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage From 1efa74834bfae1fcf13e39e2cd00f89fc64c94b5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 10:52:23 +0200 Subject: [PATCH 48/60] remove duplicate imports --- tests/integrations/openai/test_openai.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4a5215b7ae..f2bb8912a4 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -24,9 +24,6 @@ SKIP_RESPONSES_TESTS = False try: - from openai.types.chat.chat_completion import Choice - from openai.types.chat import ChatCompletionMessage - from openai.types.completion_usage import CompletionUsage from openai.types.responses.response_completed_event import ResponseCompletedEvent from openai.types.responses.response_created_event import ResponseCreatedEvent from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent From df5a72f4bce420c3ff9028a83698692968f4fc56 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 10:55:20 +0200 Subject: [PATCH 49/60] merge and function_id --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index d885883ac2..431cba023f 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -252,7 +252,7 @@ def test_langchain_tool_call_with_run_name( tx = events[0] tool_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") assert len(tool_spans) == 1 - assert tool_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert tool_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" @pytest.mark.skipif( From ec4ee52493e45b3ad1ee5b8025c58a847e432f7e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 10:55:42 +0200 Subject: [PATCH 50/60] set function id --- sentry_sdk/integrations/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 973be3e520..bffa174f22 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -644,7 +644,7 @@ def on_tool_start( run_name = kwargs.get("name") if run_name: span.set_data( - SPANDATA.GEN_AI_PIPELINE_NAME, + SPANDATA.GEN_AI_FUNCTION_ID, run_name, ) From a566ced637428af15ebe4c30816313e2e0cd2c10 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 10:56:49 +0200 Subject: [PATCH 51/60] update assertion --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 92a2607a2f..aed451e791 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -225,7 +225,7 @@ def test_langchain_chat_with_run_name( chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") assert len(chat_spans) == 1 - assert chat_spans[0]["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert chat_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" @pytest.mark.skipif( From 45b3dae5f4ba466fbd434b1f00564611cc25103f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:01:15 +0200 Subject: [PATCH 52/60] change to function_id --- sentry_sdk/integrations/langchain.py | 4 ++-- tests/integrations/langchain/test_langchain.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 0b64e999a2..9a7e3c42e8 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -975,7 +975,7 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": origin=LangchainIntegration.origin, ) as span: if run_name: - span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_FUNCTION_ID, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) @@ -1035,7 +1035,7 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": span.__enter__() if run_name: - span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_FUNCTION_ID, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e3477a5542..e9cbe07d36 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -655,7 +655,7 @@ def test_langchain_openai_tools_agent( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 @@ -858,7 +858,7 @@ def test_langchain_openai_tools_agent_with_config( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 @@ -1060,7 +1060,7 @@ def test_langchain_openai_tools_agent_stream( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 @@ -1263,7 +1263,7 @@ def test_langchain_openai_tools_agent_stream_with_config( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" - assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 From fb388a968b3509c048e9dffd5e7cf726cbd24509 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:07:17 +0200 Subject: [PATCH 53/60] update kwarg --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index aed451e791..0a7a920624 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -197,7 +197,7 @@ def test_langchain_chat_with_run_name( nonstreaming_chat_completions_model_response( response_id="chat-id", response_model="response-model-id", - response_content="the model response", + message_content="the model response", created=10000000, ), serialize_pydantic=True, From efc9460077728974c1a0c3c48eed57746942431f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:12:44 +0200 Subject: [PATCH 54/60] make openai test values consistent with previous values --- tests/integrations/openai/test_openai.py | 54 ++++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index f2bb8912a4..c1f07e83eb 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -128,10 +128,10 @@ def test_nonstreaming_chat_completion_no_prompts( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -237,10 +237,10 @@ def test_nonstreaming_chat_completion( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -328,10 +328,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts( client = AsyncOpenAI(api_key="z") client.chat.completions._post = mock.AsyncMock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -435,10 +435,10 @@ async def test_nonstreaming_chat_completion_async( client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -1888,10 +1888,10 @@ def test_span_origin_nonstreaming_chat( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -1919,10 +1919,10 @@ async def test_span_origin_nonstreaming_chat_async( client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -3686,10 +3686,10 @@ def test_empty_tools_in_chat_completion( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -3740,10 +3740,10 @@ def test_openai_message_role_mapping( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) @@ -3780,10 +3780,10 @@ def test_openai_message_truncation( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( return_value=nonstreaming_chat_completions_model_response( - response_id="chatcmpl-test", + response_id="chat-id", response_model="gpt-3.5-turbo", - message_content="Test response", - created=1234567890, + message_content="the model response", + created=10000000, ) ) From 1de30a1bf6f2314fcd4d16e398a2647adae5129a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:25:06 +0200 Subject: [PATCH 55/60] update fixture arguments --- .../integrations/langchain/test_langchain.py | 5 +++ tests/integrations/openai/test_openai.py | 45 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 0a7a920624..437bcb17a3 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -199,6 +199,11 @@ def test_langchain_chat_with_run_name( response_model="response-model-id", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers=request_headers, diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c1f07e83eb..20bbf2adf5 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -132,6 +132,11 @@ def test_nonstreaming_chat_completion_no_prompts( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -241,6 +246,11 @@ def test_nonstreaming_chat_completion( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -332,6 +342,11 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -439,6 +454,11 @@ async def test_nonstreaming_chat_completion_async( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -1892,6 +1912,11 @@ def test_span_origin_nonstreaming_chat( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -1923,6 +1948,11 @@ async def test_span_origin_nonstreaming_chat_async( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -3690,6 +3720,11 @@ def test_empty_tools_in_chat_completion( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -3744,6 +3779,11 @@ def test_openai_message_role_mapping( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) @@ -3784,6 +3824,11 @@ def test_openai_message_truncation( response_model="gpt-3.5-turbo", message_content="the model response", created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), ) ) From 42dbc3a147305ac9bc0b392d2cd1fea85cecd071 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:25:39 +0200 Subject: [PATCH 56/60] . --- tests/conftest.py | 12 ++-- tests/integrations/litellm/test_litellm.py | 76 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5d014d2411..ba28e4991c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1266,7 +1266,11 @@ def streaming_chat_completions_model_response(): @pytest.fixture def nonstreaming_chat_completions_model_response(): def inner( - response_id: str, response_model: str, message_content: str, created: int + response_id: str, + response_model: str, + message_content: str, + created: int, + usage: openai.types.CompletionUsage, ): return openai.types.chat.ChatCompletion( id=response_id, @@ -1282,11 +1286,7 @@ def inner( created=created, model=response_model, object="chat.completion", - usage=openai.types.CompletionUsage( - prompt_tokens=10, - completion_tokens=20, - total_tokens=30, - ), + usage=usage, ) return inner diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b1a3b6af89..18f8cfaf6e 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -33,6 +33,7 @@ async def __call__(self, *args, **kwargs): from sentry_sdk.utils import package_version from openai import OpenAI, AsyncOpenAI +from openai.types import CompletionUsage from concurrent.futures import ThreadPoolExecutor @@ -165,6 +166,11 @@ def test_nonstreaming_chat_completion( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -252,6 +258,11 @@ async def test_async_nonstreaming_chat_completion( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -919,6 +930,11 @@ def test_span_origin( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -969,6 +985,11 @@ def test_multiple_providers( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1068,6 +1089,11 @@ async def test_async_multiple_providers( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1168,6 +1194,11 @@ def test_additional_parameters( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1231,6 +1262,11 @@ async def test_async_additional_parameters( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1294,6 +1330,11 @@ def test_no_integration( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1346,6 +1387,11 @@ async def test_async_no_integration( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1528,6 +1574,11 @@ def test_binary_content_encoding_image_url( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1611,6 +1662,11 @@ async def test_async_binary_content_encoding_image_url( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1696,6 +1752,11 @@ def test_binary_content_encoding_mixed_content( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1768,6 +1829,11 @@ async def test_async_binary_content_encoding_mixed_content( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1839,6 +1905,11 @@ def test_binary_content_encoding_uri_type( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, @@ -1916,6 +1987,11 @@ async def test_async_binary_content_encoding_uri_type( response_model="gpt-3.5-turbo", message_content="Test response", created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, From 94d848c02978f619993fca44d5f2f0e5a17264cb Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:34:23 +0200 Subject: [PATCH 57/60] fix(langchain): Record run_name as gen_ai.function_id for text completions --- sentry_sdk/integrations/langchain.py | 2 +- tests/integrations/langchain/test_langchain.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 9a7e3c42e8..1be9cf006a 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -348,7 +348,7 @@ def on_llm_start( pipeline_name = kwargs.get("name") if pipeline_name: - span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, pipeline_name) + span.set_data(SPANDATA.GEN_AI_FUNCTION_ID, pipeline_name) if model: span.set_data( diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index f1cff8f58d..3a23dd6c77 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -155,7 +155,7 @@ def test_langchain_text_completion( llm_span = llm_spans[0] assert llm_span["description"] == "text_completion gpt-3.5-turbo" assert llm_span["data"]["gen_ai.system"] == "openai" - assert llm_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + assert llm_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline" assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo" assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris." assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25 @@ -1859,7 +1859,7 @@ def test_langchain_message_truncation(sentry_init, capture_events): llm_span = llm_spans[0] assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] From fd7693b2651a68dc7ab1e2247bac5e4f7fe1d35b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 14 Apr 2026 11:35:44 +0200 Subject: [PATCH 58/60] update variable name --- sentry_sdk/integrations/langchain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 1be9cf006a..8acf215bfe 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -346,9 +346,9 @@ def on_llm_start( span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "text_completion") - pipeline_name = kwargs.get("name") - if pipeline_name: - span.set_data(SPANDATA.GEN_AI_FUNCTION_ID, pipeline_name) + run_name = kwargs.get("name") + if run_name: + span.set_data(SPANDATA.GEN_AI_FUNCTION_ID, run_name) if model: span.set_data( From 540a596222feb6ad8f4c28e8a9dce0bc2d51f44b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 22 Apr 2026 17:11:45 +0200 Subject: [PATCH 59/60] remove conftest fixture --- tests/conftest.py | 169 ---------------------------------------------- 1 file changed, 169 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b9a3b6ef55..ba28e4991c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1398,175 +1398,6 @@ def nonstreaming_google_genai_model_response(): ) -@pytest.fixture -def streaming_chat_completions_model_responses(): - def inner(): - yield [ - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - role="assistant" - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - tool_calls=[ - openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( - index=0, - id="call_BbeyNhCKa6kYLYzrD40NGm3b", - type="function", - function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( - name="get_word_length", - arguments="", - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - tool_calls=[ - openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( - index=0, - function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( - arguments='{"word": "eudca"}', - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - content="5" - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), - finish_reason="function_call", - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=openai.types.chat.chat_completion_chunk.CompletionUsage( - prompt_tokens=142, - completion_tokens=50, - total_tokens=192, - ), - ), - ] - - yield [ - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - role="assistant" - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( - content="The word eudca has 5 letters." - ), - finish_reason=None, - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - openai.types.chat.chat_completion_chunk.Choice( - index=0, - delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), - finish_reason="stop", - ), - ], - ), - openai.types.chat.chat_completion_chunk.ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=openai.types.chat.chat_completion_chunk.CompletionUsage( - prompt_tokens=89, - completion_tokens=28, - total_tokens=117, - ), - ), - ] - - return inner - - @pytest.fixture def responses_tool_call_model_responses(): def inner( From 222046ce92f8f77f57af25f29a0340ee9c612a82 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 22 Apr 2026 17:16:37 +0200 Subject: [PATCH 60/60] test fix --- tests/integrations/langchain/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 33179eb944..240a78e2cc 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1983,7 +1983,7 @@ def test_langchain_message_truncation(sentry_init, capture_events): llm_span = llm_spans[0] assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]