Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,16 +534,22 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
if "format" in document:
result["format"] = document["format"]

# Handle source - supports bytes or location
# Handle source - supports bytes, content, location, or text
if "source" in document:
source = document["source"]
formatted_document_source: dict[str, Any] | None
if "location" in source:
formatted_document_source = self._handle_location(source["location"])
document_source = document["source"]
formatted_document_source: dict[str, Any] | None = None
if "location" in document_source:
formatted_document_source = self._handle_location(document_source["location"])
if formatted_document_source is None:
return None
elif "bytes" in source:
formatted_document_source = {"bytes": source["bytes"]}
elif "bytes" in document_source:
formatted_document_source = {"bytes": document_source["bytes"]}
elif "text" in document_source:
formatted_document_source = {"text": document_source["text"]}
elif "content" in document_source:
formatted_document_source = {
"content": [{"text": item["text"]} for item in document_source["content"] if "text" in item]
}
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When document["source"] is present but does not contain any of the recognized keys (location, bytes, text, content), formatted_document_source remains None and the request is sent with "source": None. This will violate Bedrock’s schema (source should be an object) and can lead to a runtime validation error. Consider either (a) returning None to drop the whole document block (consistent with the unsupported-location path), or (b) only setting result["source"] when formatted_document_source is not None and otherwise raising a TypeError/logging + skipping.

Suggested change
}
}
if formatted_document_source is None:
return None

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good point to raise, the source field is always required in a DocumentSource (per docs), and when source is passed as None a botocore exception is raised:

botocore.exceptions.ParamValidationError: Parameter validation failed:
Missing required parameter in messages[0].content[1].document: "source"

This is the current behavior which I was not modifying, so based on any review provided, I can follow the location handling of dropping, raising an exception in Strands, or allowing botocore to handle it. I personally think leaving the current implementation is best, allowing botocore/aws to bubble up the correct exception, not dealing with that in Strands itself.

Comment on lines +550 to +552
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For DocumentSource.content, this always emits a content array even if every item is filtered out by the if "text" in item guard, resulting in {"content": []}. If Bedrock rejects empty content (similar filtering elsewhere in this file omits empty arrays), this can cause request validation failures. Consider omitting the content key when the filtered list is empty, or returning None to skip the document block when no valid content blocks remain.

Suggested change
formatted_document_source = {
"content": [{"text": item["text"]} for item in document_source["content"] if "text" in item]
}
formatted_content = [{"text": item["text"]} for item in document_source["content"] if "text" in item]
if not formatted_content:
return None
formatted_document_source = {"content": formatted_content}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When an empty content list is passed in, botocore throws an exception:

botocore.errorfactory.ValidationException: An error occurred (ValidationException) when calling the ConverseStream operation: The model returned the following errors: messages.0.content.1.document.source.content.content: Field required

I see value in not passing content (returning None) when there is actually no content.

Not sure what the use case of a user providing an empty source document is, so it can be filtered out. I think if this path is chosen then the same approach should be used for the document source being None (dropping, since there is no content). I don't think a user would want this to error out, but also would be good to know that malformed documents are being provided and not being silently hidden.

result["source"] = formatted_document_source

# Handle optional fields
Expand All @@ -564,14 +570,14 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
# https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ImageBlock.html
if "image" in content:
image = content["image"]
source = image["source"]
image_source = image["source"]
formatted_image_source: dict[str, Any] | None
if "location" in source:
formatted_image_source = self._handle_location(source["location"])
if "location" in image_source:
formatted_image_source = self._handle_location(image_source["location"])
if formatted_image_source is None:
return None
elif "bytes" in source:
formatted_image_source = {"bytes": source["bytes"]}
elif "bytes" in image_source:
formatted_image_source = {"bytes": image_source["bytes"]}
result = {"format": image["format"], "source": formatted_image_source}
return {"image": result}
Comment on lines +573 to 582
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatted_image_source is only assigned in the location/bytes branches. If an image block includes a source without either key (e.g., empty dict), this will raise UnboundLocalError when building result. Initialize formatted_image_source to None and either return None (skip block) or raise a TypeError when neither supported key is present.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This leaves current behavior to keep change set smaller, this area was only updated to address mypy linter concerns. If needed, I can update this area, but will increase the logical scope of the changes.


Expand Down Expand Up @@ -636,14 +642,14 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
# https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_VideoBlock.html
if "video" in content:
video = content["video"]
source = video["source"]
video_source = video["source"]
formatted_video_source: dict[str, Any] | None
if "location" in source:
formatted_video_source = self._handle_location(source["location"])
if "location" in video_source:
formatted_video_source = self._handle_location(video_source["location"])
if formatted_video_source is None:
return None
elif "bytes" in source:
formatted_video_source = {"bytes": source["bytes"]}
elif "bytes" in video_source:
formatted_video_source = {"bytes": video_source["bytes"]}
result = {"format": video["format"], "source": formatted_video_source}
return {"video": result}
Comment on lines +645 to 654
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatted_video_source is only assigned in the location/bytes branches. If a video block includes a source without either key, this will raise UnboundLocalError when building result. Initialize formatted_video_source to None and either return None (skip block) or raise a TypeError when neither supported key is present.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This leaves current behavior to keep change set smaller, this area was only updated to address mypy linter concerns. If needed, I can update this area, but will increase the logical scope of the changes.


Expand Down
16 changes: 15 additions & 1 deletion src/strands/types/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,32 @@ class S3Location(Location, total=False):
SourceLocation: TypeAlias = Location | S3Location


class DocumentBlockContent(TypedDict, total=False):
"""An inline content block within a document source.

Attributes:
text: The text content of the block.
"""

text: str


class DocumentSource(TypedDict, total=False):
"""Contains the content of a document.

Only one of `bytes` or `s3Location` should be specified.
Only one of `bytes`, `content`, `location`, or `text` should be specified.

Attributes:
bytes: The binary content of the document.
content: List of content blocks.
location: Location of the document.
text: Text contents of the document.
"""

bytes: bytes
content: list[DocumentBlockContent]
location: SourceLocation
text: str


class DocumentContent(TypedDict, total=False):
Expand Down
52 changes: 52 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -1970,6 +1970,58 @@ def test_format_request_filters_document_content_blocks(model, model_id):
assert "metadata" not in document_block


def test_format_request_document_text_source(model, model_id):
"""Test that document with text source is properly formatted."""
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "notes.txt",
"format": "txt",
"source": {"text": "plain text content"},
}
},
],
}
]

formatted_request = model._format_request(messages)

document_source = formatted_request["messages"][0]["content"][0]["document"]["source"]
assert document_source == {"text": "plain text content"}


def test_format_request_document_content_source(model, model_id):
"""Test that document with content source is properly formatted, filtering items without text."""
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "doc.txt",
"format": "txt",
"source": {
"content": [
{"text": "block one"},
{"text": "block two"},
{}, # This should be filtered out
]
},
}
},
],
}
]

formatted_request = model._format_request(messages)

document_source = formatted_request["messages"][0]["content"][0]["document"]["source"]
assert document_source == {"content": [{"text": "block one"}, {"text": "block two"}]}


def test_format_request_filters_nested_reasoning_content(model, model_id):
"""Test deep filtering of nested reasoningText fields."""
messages = [
Expand Down
37 changes: 37 additions & 0 deletions tests/strands/types/test_media.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for media type definitions."""

from strands.types.media import (
DocumentBlockContent,
DocumentSource,
ImageSource,
S3Location,
Expand Down Expand Up @@ -52,6 +53,42 @@ def test_document_source_with_s3_location(self):
assert doc_source["s3Location"]["uri"] == "s3://my-bucket/docs/report.pdf"
assert doc_source["s3Location"]["bucketOwner"] == "123456789012"

def test_document_source_with_text(self):
"""Test DocumentSource with text content."""
doc_source: DocumentSource = {"text": "plain text content"}

assert doc_source["text"] == "plain text content"
assert "bytes" not in doc_source
assert "location" not in doc_source
assert "content" not in doc_source

def test_document_source_with_content(self):
"""Test DocumentSource with content blocks."""
doc_source: DocumentSource = {"content": [{"text": "block one"}, {"text": "block two"}]}

assert len(doc_source["content"]) == 2
assert doc_source["content"][0]["text"] == "block one"
assert doc_source["content"][1]["text"] == "block two"
assert "bytes" not in doc_source
assert "location" not in doc_source
assert "text" not in doc_source


class TestDocumentBlockContent:
"""Tests for DocumentBlockContent TypedDict."""

def test_document_block_content_with_text(self):
"""Test DocumentBlockContent with text field."""
block: DocumentBlockContent = {"text": "hello"}

assert block["text"] == "hello"

def test_document_block_content_empty(self):
"""Test DocumentBlockContent with no fields (total=False)."""
block: DocumentBlockContent = {}

assert "text" not in block


class TestImageSource:
"""Tests for ImageSource TypedDict."""
Expand Down
Loading