diff --git a/.changelog/5364.fixed b/.changelog/5364.fixed new file mode 100644 index 0000000000..a710b21093 --- /dev/null +++ b/.changelog/5364.fixed @@ -0,0 +1,3 @@ +`opentelemetry-sdk`: `ProcessResourceDetector` no longer collects or emits +`process.command_args` and `process.command_line` by default. Users who depend +on these resource attributes must pass `include_command_args=True`. diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index d10f84345b..f315e4bbe2 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -321,7 +321,24 @@ def detect(self) -> "Resource": class ProcessResourceDetector(ResourceDetector): - # pylint: disable=no-self-use + """Detect process resource attributes. + + Args: + raise_on_error: Raise errors from detection instead of logging them. + include_command_args: Include ``process.command_args`` and + ``process.command_line``. These attributes can contain sensitive + command-line values, so they are excluded by default. + """ + + def __init__( + self, + raise_on_error: bool = False, + *, + include_command_args: bool = False, + ) -> None: + super().__init__(raise_on_error=raise_on_error) + self._include_command_args = include_command_args + def detect(self) -> "Resource": _runtime_version = ".".join( map( @@ -340,14 +357,10 @@ def detect(self) -> "Resource": # Use sys.orig_argv, which preserves the original arguments received # by the interpreter. This correctly captures ``python -m `` # invocations where sys.argv is rewritten to the resolved module path - # and the ``-m `` information is lost. sys.orig_argv also - # aligns with /proc//cmdline, which the OTel semantic - # conventions reference for these attributes. - _process_argv = list(sys.orig_argv) - _process_command = _process_argv[0] if _process_argv else "" - _process_command_line = " ".join(_process_argv) - _process_command_args = _process_argv - resource_info = { + # and the ``-m `` information is lost. Only read argv[0] by + # default because full command arguments are opt-in. + _process_command = sys.orig_argv[0] if sys.orig_argv else "" + resource_info: dict[str, AttributeValue] = { PROCESS_RUNTIME_DESCRIPTION: sys.version, PROCESS_RUNTIME_NAME: sys.implementation.name, PROCESS_RUNTIME_VERSION: _runtime_version, @@ -355,9 +368,12 @@ def detect(self) -> "Resource": PROCESS_EXECUTABLE_NAME: _process_executable_name, PROCESS_EXECUTABLE_PATH: _process_executable_path, PROCESS_COMMAND: _process_command, - PROCESS_COMMAND_LINE: _process_command_line, - PROCESS_COMMAND_ARGS: _process_command_args, } + if self._include_command_args: + _process_argv = list(sys.orig_argv) + resource_info[PROCESS_COMMAND_LINE] = " ".join(_process_argv) + resource_info[PROCESS_COMMAND_ARGS] = _process_argv + if hasattr(os, "getppid"): # pypy3 does not have getppid() resource_info[PROCESS_PARENT_PID] = os.getppid() diff --git a/opentelemetry-sdk/tests/resources/test_resources.py b/opentelemetry-sdk/tests/resources/test_resources.py index 2b945ea9fd..f2ca97b60d 100644 --- a/opentelemetry-sdk/tests/resources/test_resources.py +++ b/opentelemetry-sdk/tests/resources/test_resources.py @@ -657,6 +657,26 @@ def test_process_detector(self): self.assertEqual( aggregated_resource.attributes[PROCESS_COMMAND], sys.orig_argv[0] ) + self.assertNotIn( + PROCESS_COMMAND_LINE, + aggregated_resource.attributes, + ) + self.assertNotIn( + PROCESS_COMMAND_ARGS, + aggregated_resource.attributes, + ) + + @patch( + "sys.orig_argv", + ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"], + ) + def test_process_detector_includes_command_args_on_opt_in(self): + initial_resource = Resource({"foo": "bar"}) + aggregated_resource = get_aggregated_resources( + [ProcessResourceDetector(include_command_args=True)], + initial_resource, + ) + self.assertEqual( aggregated_resource.attributes[PROCESS_COMMAND_LINE], " ".join(sys.orig_argv), @@ -666,6 +686,35 @@ def test_process_detector(self): tuple(sys.orig_argv), ) + def test_process_detector_does_not_iterate_orig_argv_by_default(self): + class SensitiveOrigArgv: + def __getitem__(self, index): + if index == 0: + return "uvicorn" + raise AssertionError("unexpected argv access") + + def __iter__(self): + raise AssertionError("unexpected argv iteration") + + with patch("sys.orig_argv", SensitiveOrigArgv()): + aggregated_resource = get_aggregated_resources( + [ProcessResourceDetector()], + Resource({"foo": "bar"}), + ) + + self.assertEqual( + aggregated_resource.attributes[PROCESS_COMMAND], + "uvicorn", + ) + self.assertNotIn( + PROCESS_COMMAND_LINE, + aggregated_resource.attributes, + ) + self.assertNotIn( + PROCESS_COMMAND_ARGS, + aggregated_resource.attributes, + ) + @patch("sys.orig_argv", ["/usr/bin/python", "-m", "myapp"]) def test_process_detector_uses_orig_argv_for_python_m(self): """For ``python -m `` invocations sys.argv[0] is rewritten to @@ -677,6 +726,26 @@ def test_process_detector_uses_orig_argv_for_python_m(self): [ProcessResourceDetector()], Resource({"foo": "bar"}) ) + self.assertEqual( + aggregated_resource.attributes[PROCESS_COMMAND], + "/usr/bin/python", + ) + self.assertNotIn( + PROCESS_COMMAND_LINE, + aggregated_resource.attributes, + ) + self.assertNotIn( + PROCESS_COMMAND_ARGS, + aggregated_resource.attributes, + ) + + @patch("sys.orig_argv", ["/usr/bin/python", "-m", "myapp"]) + def test_process_detector_uses_orig_argv_for_python_m_on_opt_in(self): + aggregated_resource = get_aggregated_resources( + [ProcessResourceDetector(include_command_args=True)], + Resource({"foo": "bar"}), + ) + self.assertEqual( aggregated_resource.attributes[PROCESS_COMMAND], "/usr/bin/python",