Skip to content
Open
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

- `opentelemetry-api`: Enforce W3C Baggage size limits on outbound propagation in `W3CBaggagePropagator.inject()`. Previously only inbound extraction enforced limits; now inject also caps entries at 180, individual pairs at 4096 bytes, and total header at 8192 bytes per the W3C Baggage spec. The extract path max_pairs limit now counts all size-valid entries rather than only successfully parsed ones.
([#5163](https://github.com/open-telemetry/opentelemetry-python/pull/5163))
- `opentelemetry-sdk`: add `additional_properties` support to generated config models via custom `datamodel-codegen` template, enabling plugin/custom component names to flow through typed dataclasses
([#5131](https://github.com/open-telemetry/opentelemetry-python/pull/5131))
- Fix incorrect code example in `create_tracer()` docstring
Expand Down
108 changes: 79 additions & 29 deletions opentelemetry-api/src/opentelemetry/baggage/propagation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
from logging import getLogger
from re import split
from typing import Iterable, List, Mapping, Optional, Set
from typing import Iterable, Iterator, Mapping, Optional, Set
from urllib.parse import quote_plus, unquote_plus

from opentelemetry.baggage import _is_valid_pair, get_all, set_baggage
Expand All @@ -26,6 +26,59 @@
_logger = getLogger(__name__)


def _filter_valid_entries(
entries: Iterable[str],
max_pair_length: int,
) -> Iterator[str]:
for entry in entries:
if not entry:
continue
if not entry.isascii():
_logger.warning(
"Baggage entry with key `%s` contains non-ASCII characters",
entry.split("=", 1)[0],
)
continue
if len(entry) > max_pair_length:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to count exact bytes I think you should encode the string to utf-8 first ? len(s.encode())

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this would be a problem for just ASCII characters which the baggage spec restricts the key and values to be.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we already validate that it's ASCII by the time this runs?

Copy link
Copy Markdown
Contributor Author

@lzchen lzchen Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not.

I've added encode for the header length check and is_ascii() in the check before per entry length check.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still slightly fuzzy on how we handle non-ascii. I see in the baggage spec that values should be url encoded

Any code points outside of the baggage-octet range MUST be percent-encoded. The percent code point (U+0025) MUST be percent-encoded.

So if we do that, does this code come after? That would change string length FWIW.

_logger.warning(
"Baggage entry with key `%s` exceeded the maximum number of bytes per list-member with length %d",
entry.split("=", 1)[0],
len(entry),
)
continue
yield entry


def _apply_baggage_limits(
entries: Iterable[str],
max_pairs: int,
max_pair_length: int,
max_header_length: int,
) -> Iterator[str]:
"""Apply W3C Baggage size limits to a sequence of baggage entries.

Yields entries that fit within the W3C specification limits.
Logs warnings when entries are dropped.
"""
length = 0
for index, entry in enumerate(
_filter_valid_entries(entries, max_pair_length)
):
if index >= max_pairs:
_logger.warning(
"Baggage exceeded the maximum number of list-members"
)
return

length += (1 if index > 0 else 0) + len(entry)
if length > max_header_length:
_logger.warning(
"Baggage exceeded the maximum number of bytes per baggage-string"
)
return
yield entry


class W3CBaggagePropagator(textmap.TextMapPropagator):
"""Extracts and injects Baggage which is used to annotate telemetry."""

Expand Down Expand Up @@ -56,31 +109,21 @@ def extract(
if not header:
return context

if len(header) > self._MAX_HEADER_LENGTH:
if len(header.encode()) > self._MAX_HEADER_LENGTH:
_logger.warning(
"Baggage header `%s` exceeded the maximum number of bytes per baggage-string",
header,
)
return context

baggage_entries: List[str] = split(_DELIMITER_PATTERN, header)
total_baggage_entries = self._MAX_PAIRS

if len(baggage_entries) > self._MAX_PAIRS:
_logger.warning(
"Baggage header `%s` exceeded the maximum number of list-members",
header,
)
baggage_entries = split(_DELIMITER_PATTERN, header)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to use _apply_baggage_limits here instead of doing it twice below.

Copy link
Copy Markdown
Contributor Author

@lzchen lzchen Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm not sure what you mean by this? I've also refactored the logic so please take another look to see if the comment is still relevant.


for entry in baggage_entries:
if len(entry) > self._MAX_PAIR_LENGTH:
_logger.warning(
"Baggage entry `%s` exceeded the maximum number of bytes per list-member",
entry,
)
continue
if not entry: # empty string
continue
for entry in _apply_baggage_limits(
baggage_entries,
max_pairs=self._MAX_PAIRS,
max_pair_length=self._MAX_PAIR_LENGTH,
max_header_length=self._MAX_HEADER_LENGTH,
):
try:
name, value = entry.split("=", 1)
except Exception: # pylint: disable=broad-exception-caught
Expand All @@ -101,9 +144,6 @@ def extract(
value,
context=context,
)
total_baggage_entries -= 1
if total_baggage_entries == 0:
break

return context

Expand All @@ -122,20 +162,30 @@ def inject(
if not baggage_entries:
return

baggage_string = _format_baggage(baggage_entries)
setter.set(carrier, self._BAGGAGE_HEADER_NAME, baggage_string)
baggage_string = ",".join(
_apply_baggage_limits(
_encode_baggage_pairs(baggage_entries),
max_pairs=self._MAX_PAIRS,
max_pair_length=self._MAX_PAIR_LENGTH,
max_header_length=self._MAX_HEADER_LENGTH,
)
)

if baggage_string:
setter.set(carrier, self._BAGGAGE_HEADER_NAME, baggage_string)

@property
def fields(self) -> Set[str]:
"""Returns a set with the fields set in `inject`."""
return {self._BAGGAGE_HEADER_NAME}


def _format_baggage(baggage_entries: Mapping[str, object]) -> str:
return ",".join(
quote_plus(str(key)) + "=" + quote_plus(str(value))
for key, value in baggage_entries.items()
)
def _encode_baggage_pairs(
baggage_entries: Mapping[str, object],
) -> Iterator[str]:
"""Yield URL-encoded 'key=value' pairs from baggage entries."""
for key, value in baggage_entries.items():
yield quote_plus(str(key)) + "=" + quote_plus(str(value))


def _extract_first_element(
Expand Down
4 changes: 2 additions & 2 deletions opentelemetry-api/tests/propagators/test__envcarrier.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,9 @@ def test_roundtrip_baggage(self):
self.assertEqual(baggage1, baggage2)

@patch("opentelemetry.baggage.propagation.get_all")
@patch("opentelemetry.baggage.propagation._format_baggage")
def test_fields(self, mock_format_baggage, mock_get_all):
def test_fields(self, mock_get_all):
"""Test that propagator.fields matches injected keys."""
mock_get_all.return_value = {"key": "value"}
mock_setter = Mock()
self.propagator.inject({}, setter=mock_setter)

Expand Down
Loading
Loading