-
Notifications
You must be signed in to change notification settings - Fork 867
Fix/baggage propagator outbound limits #5163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
cc197af
540f5bd
d16f8a1
03f4a89
24dd624
d2f1cc0
08489a1
e5b34f1
779ca2a
c863d64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,7 +14,7 @@ | |
| # | ||
| from logging import getLogger | ||
| from re import split | ||
| from typing import Iterable, List, Mapping, Optional, Set | ||
| from typing import Iterable, Iterator, Mapping, Optional, Set | ||
| from urllib.parse import quote_plus, unquote_plus | ||
|
|
||
| from opentelemetry.baggage import _is_valid_pair, get_all, set_baggage | ||
|
|
@@ -26,6 +26,59 @@ | |
| _logger = getLogger(__name__) | ||
|
|
||
|
|
||
| def _filter_valid_entries( | ||
| entries: Iterable[str], | ||
| max_pair_length: int, | ||
| ) -> Iterator[str]: | ||
| for entry in entries: | ||
| if not entry: | ||
| continue | ||
| if not entry.isascii(): | ||
| _logger.warning( | ||
| "Baggage entry with key `%s` contains non-ASCII characters", | ||
| entry.split("=", 1)[0], | ||
| ) | ||
| continue | ||
| if len(entry) > max_pair_length: | ||
| _logger.warning( | ||
| "Baggage entry with key `%s` exceeded the maximum number of bytes per list-member with length %d", | ||
| entry.split("=", 1)[0], | ||
| len(entry), | ||
| ) | ||
| continue | ||
| yield entry | ||
|
|
||
|
|
||
| def _apply_baggage_limits( | ||
| entries: Iterable[str], | ||
| max_pairs: int, | ||
| max_pair_length: int, | ||
| max_header_length: int, | ||
| ) -> Iterator[str]: | ||
| """Apply W3C Baggage size limits to a sequence of baggage entries. | ||
|
|
||
| Yields entries that fit within the W3C specification limits. | ||
| Logs warnings when entries are dropped. | ||
| """ | ||
| length = 0 | ||
| for index, entry in enumerate( | ||
| _filter_valid_entries(entries, max_pair_length) | ||
| ): | ||
| if index >= max_pairs: | ||
| _logger.warning( | ||
| "Baggage exceeded the maximum number of list-members" | ||
| ) | ||
| return | ||
|
|
||
| length += (1 if index > 0 else 0) + len(entry) | ||
| if length > max_header_length: | ||
| _logger.warning( | ||
| "Baggage exceeded the maximum number of bytes per baggage-string" | ||
| ) | ||
| return | ||
| yield entry | ||
|
|
||
|
|
||
| class W3CBaggagePropagator(textmap.TextMapPropagator): | ||
| """Extracts and injects Baggage which is used to annotate telemetry.""" | ||
|
|
||
|
|
@@ -56,31 +109,21 @@ def extract( | |
| if not header: | ||
| return context | ||
|
|
||
| if len(header) > self._MAX_HEADER_LENGTH: | ||
| if len(header.encode()) > self._MAX_HEADER_LENGTH: | ||
| _logger.warning( | ||
| "Baggage header `%s` exceeded the maximum number of bytes per baggage-string", | ||
| header, | ||
| ) | ||
| return context | ||
|
|
||
| baggage_entries: List[str] = split(_DELIMITER_PATTERN, header) | ||
| total_baggage_entries = self._MAX_PAIRS | ||
|
|
||
| if len(baggage_entries) > self._MAX_PAIRS: | ||
| _logger.warning( | ||
| "Baggage header `%s` exceeded the maximum number of list-members", | ||
| header, | ||
| ) | ||
| baggage_entries = split(_DELIMITER_PATTERN, header) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better to use _apply_baggage_limits here instead of doing it twice below.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I'm not sure what you mean by this? I've also refactored the logic so please take another look to see if the comment is still relevant. |
||
|
|
||
| for entry in baggage_entries: | ||
| if len(entry) > self._MAX_PAIR_LENGTH: | ||
| _logger.warning( | ||
| "Baggage entry `%s` exceeded the maximum number of bytes per list-member", | ||
| entry, | ||
| ) | ||
| continue | ||
| if not entry: # empty string | ||
| continue | ||
| for entry in _apply_baggage_limits( | ||
| baggage_entries, | ||
| max_pairs=self._MAX_PAIRS, | ||
| max_pair_length=self._MAX_PAIR_LENGTH, | ||
| max_header_length=self._MAX_HEADER_LENGTH, | ||
| ): | ||
| try: | ||
| name, value = entry.split("=", 1) | ||
| except Exception: # pylint: disable=broad-exception-caught | ||
|
|
@@ -101,9 +144,6 @@ def extract( | |
| value, | ||
| context=context, | ||
| ) | ||
| total_baggage_entries -= 1 | ||
| if total_baggage_entries == 0: | ||
| break | ||
|
|
||
| return context | ||
|
|
||
|
|
@@ -122,20 +162,30 @@ def inject( | |
| if not baggage_entries: | ||
| return | ||
|
|
||
| baggage_string = _format_baggage(baggage_entries) | ||
| setter.set(carrier, self._BAGGAGE_HEADER_NAME, baggage_string) | ||
| baggage_string = ",".join( | ||
| _apply_baggage_limits( | ||
| _encode_baggage_pairs(baggage_entries), | ||
| max_pairs=self._MAX_PAIRS, | ||
| max_pair_length=self._MAX_PAIR_LENGTH, | ||
| max_header_length=self._MAX_HEADER_LENGTH, | ||
| ) | ||
| ) | ||
|
|
||
| if baggage_string: | ||
| setter.set(carrier, self._BAGGAGE_HEADER_NAME, baggage_string) | ||
|
|
||
| @property | ||
| def fields(self) -> Set[str]: | ||
| """Returns a set with the fields set in `inject`.""" | ||
| return {self._BAGGAGE_HEADER_NAME} | ||
|
|
||
|
|
||
| def _format_baggage(baggage_entries: Mapping[str, object]) -> str: | ||
| return ",".join( | ||
| quote_plus(str(key)) + "=" + quote_plus(str(value)) | ||
| for key, value in baggage_entries.items() | ||
| ) | ||
| def _encode_baggage_pairs( | ||
| baggage_entries: Mapping[str, object], | ||
| ) -> Iterator[str]: | ||
| """Yield URL-encoded 'key=value' pairs from baggage entries.""" | ||
| for key, value in baggage_entries.items(): | ||
| yield quote_plus(str(key)) + "=" + quote_plus(str(value)) | ||
|
|
||
|
|
||
| def _extract_first_element( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to count exact bytes I think you should encode the string to utf-8 first ?
len(s.encode())There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://stackoverflow.com/questions/30686701/python-get-size-of-string-in-bytes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe this would be a problem for just ASCII characters which the baggage spec restricts the key and values to be.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we already validate that it's ASCII by the time this runs?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do not.
I've added
encodefor the header length check andis_ascii()in the check before per entry length check.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm still slightly fuzzy on how we handle non-ascii. I see in the baggage spec that values should be url encoded
So if we do that, does this code come after? That would change string length FWIW.