From f1489060b91e0632e0d67b1c3e47e92875d67312 Mon Sep 17 00:00:00 2001 From: Amir Rassafi Date: Tue, 28 Apr 2026 17:07:06 +0100 Subject: [PATCH] fix(exporter-otlp-proto-http): include response body in metric exporter error log When the OTLP HTTP metric exporter receives a non-2xx response, it currently logs only the HTTP status code and reason phrase (e.g. "400, Bad Request"), discarding the response body where the collector reports the actual rejection reason (payload too large, invalid attribute, unsupported content-type, etc.). This makes 4xx errors very hard to diagnose without patching the SDK or proxying traffic. Include the response body (truncated to 1024 chars) in the non-retryable failure log and in the retries-exhausted log so the collector's rejection detail is visible to users. --- CHANGELOG.md | 2 + .../proto/http/metric_exporter/__init__.py | 11 ++++- .../metrics/test_otlp_metrics_exporter.py | 46 +++++++++++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 614f240d4e..4a2efce3ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- `opentelemetry-exporter-otlp-proto-http`: include the response body (truncated to 1024 chars) in the metric exporter error log so collector-side rejection reasons are visible to users + ([#XXXX](https://github.com/open-telemetry/opentelemetry-python/pull/XXXX)) - `opentelemetry-sdk`: add `additional_properties` support to generated config models via custom `datamodel-codegen` template, enabling plugin/custom component names to flow through typed dataclasses ([#5131](https://github.com/open-telemetry/opentelemetry-python/pull/5131)) - Fix incorrect code example in `create_tracer()` docstring diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index efd63b4543..bd9c2886ee 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -116,6 +116,7 @@ DEFAULT_METRICS_EXPORT_PATH = "v1/metrics" DEFAULT_TIMEOUT = 10 # in seconds _MAX_RETRYS = 6 +_MAX_LOGGED_BODY_CHARS = 1024 class OTLPMetricExporter(MetricExporter, OTLPMetricExporterMixin): @@ -292,16 +293,21 @@ def _export_with_retries( export_error = error retryable = isinstance(error, ConnectionError) status_code = None + body = None else: reason = resp.reason retryable = _is_retryable(resp) status_code = resp.status_code + body = (resp.text or None) if not resp.ok else None + if body is not None and len(body) > _MAX_LOGGED_BODY_CHARS: + body = body[:_MAX_LOGGED_BODY_CHARS] + "...[truncated]" if not retryable: _logger.error( - "Failed to export metrics batch code: %s, reason: %s", + "Failed to export metrics batch code: %s, reason: %s, body: %s", status_code, reason, + body, ) error_attrs = ( {HTTP_RESPONSE_STATUS_CODE: status_code} @@ -318,7 +324,8 @@ def _export_with_retries( ): _logger.error( "Failed to export metrics batch due to timeout, " - "max retries or shutdown." + "max retries or shutdown. last response body: %s", + body, ) error_attrs = ( {HTTP_RESPONSE_STATUS_CODE: status_code} diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py index 5f7ae2afa9..0331842a70 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py @@ -433,6 +433,52 @@ def test_failure(self, mock_post): 401, ) + @patch.object(Session, "post") + def test_failure_logs_response_body(self, mock_post): + resp = Response() + resp.status_code = 400 + resp.reason = "Bad Request" + resp._content = b"resource_metrics: data points exceed message size" + mock_post.return_value = resp + + exporter = OTLPMetricExporter() + + with self.assertLogs(level="ERROR") as logs: + self.assertEqual( + exporter.export(self.metrics["sum_int"]), + MetricExportResult.FAILURE, + ) + + self.assertTrue( + any( + "resource_metrics: data points exceed message size" + in record.getMessage() + for record in logs.records + ), + "Expected response body to appear in error log, " + f"got: {[r.getMessage() for r in logs.records]}", + ) + + @patch.object(Session, "post") + def test_failure_logs_truncates_long_response_body(self, mock_post): + resp = Response() + resp.status_code = 400 + resp.reason = "Bad Request" + resp._content = b"x" * 5000 + mock_post.return_value = resp + + exporter = OTLPMetricExporter() + + with self.assertLogs(level="ERROR") as logs: + self.assertEqual( + exporter.export(self.metrics["sum_int"]), + MetricExportResult.FAILURE, + ) + + joined = " ".join(record.getMessage() for record in logs.records) + self.assertIn("...[truncated]", joined) + self.assertNotIn("x" * 5000, joined) + @patch.object(Session, "post") def test_serialization(self, mock_post): resp = Response()