From 076adf017f2c2ca112572e8d5d3d5f280027786b Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Thu, 25 Jun 2026 17:25:26 -0400 Subject: [PATCH 1/5] fix: make Datadog env tag per-cluster (DD_ENV) instead of build-time context --- charts/model-engine/Chart.yaml | 2 +- charts/model-engine/templates/_helpers.tpl | 14 +++++++++----- .../celery_autoscaler_stateful_set.yaml | 2 +- charts/model-engine/values.yaml | 8 ++++++++ .../model_engine_server/common/env_vars.py | 7 +++++++ .../gateways/resources/k8s_resource_types.py | 19 ++++++++++++++++++- 6 files changed, 44 insertions(+), 8 deletions(-) diff --git a/charts/model-engine/Chart.yaml b/charts/model-engine/Chart.yaml index 5f628521..0fe620d0 100644 --- a/charts/model-engine/Chart.yaml +++ b/charts/model-engine/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.6 +version: 0.2.7 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/model-engine/templates/_helpers.tpl b/charts/model-engine/templates/_helpers.tpl index c01fb683..1173e616 100644 --- a/charts/model-engine/templates/_helpers.tpl +++ b/charts/model-engine/templates/_helpers.tpl @@ -51,7 +51,7 @@ Create chart name and version as used by the chart label. team: infra app.kubernetes.io/version: {{ .Values.tag }} tags.datadoghq.com/version: {{ .Values.tag }} -tags.datadoghq.com/env: {{ .Values.context }} +tags.datadoghq.com/env: {{ .Values.datadog.env | default .Values.context }} env: {{ .Values.context }} {{- if .Values.azure }} azure.workload.identity/use: "true" @@ -159,7 +159,7 @@ env: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: {{ .Values.context }} + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -223,7 +223,7 @@ env: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: {{ .Values.context }} + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -296,8 +296,8 @@ env: value: "{{ .Values.dd_trace_enabled }}" - name: DD_REMOTE_CONFIGURATION_ENABLED value: "false" - - name: DD_ENV - value: {{ .Values.context }} + {{- /* DD_ENV is set in the serviceEnvGitTag* wrappers: a Helm value for control-plane + pods, and the ${DD_ENV} runtime substitution for python-rendered endpoints. */}} - name: DD_AGENT_HOST valueFrom: fieldRef: @@ -421,6 +421,8 @@ env: {{- define "modelEngine.serviceEnvGitTagFromHelmVar" }} {{- include "modelEngine.serviceEnvBase" . }} + - name: DD_ENV + value: {{ .Values.datadog.env | default .Values.context }} - name: DD_VERSION value: {{ .Values.tag }} - name: GIT_TAG @@ -432,6 +434,8 @@ env: {{- define "modelEngine.serviceEnvGitTagFromPythonReplace" }} {{- include "modelEngine.serviceEnvBase" . }} + - name: DD_ENV + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: GIT_TAG diff --git a/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml b/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml index 19cfd98c..3c7a0e95 100644 --- a/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml +++ b/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml @@ -1,7 +1,7 @@ {{- if .Values.celery_autoscaler.enabled }} {{- if not .Values.serviceIdentifier }} {{- $app := include "modelEngine.celeryautoscalername" . }} -{{- $env := .Values.context }} +{{- $env := .Values.datadog.env | default .Values.context }} {{- $tag := .Values.tag }} {{- $message_broker := .Values.celeryBrokerType }} {{- $num_shards := .Values.celery_autoscaler.num_shards }} diff --git a/charts/model-engine/values.yaml b/charts/model-engine/values.yaml index bae62ce3..15ba6bf9 100644 --- a/charts/model-engine/values.yaml +++ b/charts/model-engine/values.yaml @@ -1,4 +1,12 @@ dd_trace_enabled: true + +# datadog [optional] configures Datadog tagging for model-engine pods. +datadog: + # env [optional] sets the Datadog `env` tag (DD_ENV + tags.datadoghq.com/env) on both + # control-plane pods and launched inference endpoints. Falls back to `context` when empty. + # Set per-cluster (e.g. "sgp-dev") so pods report the cluster's real environment. + env: "" + spellbook: enabled: false diff --git a/model-engine/model_engine_server/common/env_vars.py b/model-engine/model_engine_server/common/env_vars.py index 7c4626d6..a094a505 100644 --- a/model-engine/model_engine_server/common/env_vars.py +++ b/model-engine/model_engine_server/common/env_vars.py @@ -12,6 +12,7 @@ __all__: Sequence[str] = ( "CIRCLECI", + "DD_ENV", "GIT_TAG", "LAUNCH_SERVICE_TEMPLATE_CONFIG_MAP_PATH", "LAUNCH_SERVICE_TEMPLATE_FOLDER", @@ -96,3 +97,9 @@ def get_boolean_env_var(name: str) -> bool: GIT_TAG: str = os.environ.get("GIT_TAG", "GIT_TAG_NOT_FOUND") if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules: raise ValueError("GIT_TAG environment variable must be set") + +# DD_ENV is the Datadog `env` tag. It is propagated to launched inference endpoints (via the +# ${DD_ENV} template substitution) so they report the same per-cluster environment as the +# gateway, instead of the build-time `context`. Defaults to infra_config().env when the +# DD_ENV environment variable is not set on the gateway (e.g. local/CI). +DD_ENV: str = os.environ.get("DD_ENV") or infra_config().env diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 7aecf362..ec98628c 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -6,7 +6,12 @@ from model_engine_server.common.config import hmi_config from model_engine_server.common.dtos.model_endpoints import BrokerName, BrokerType from model_engine_server.common.dtos.resource_manager import CreateOrUpdateResourcesRequest -from model_engine_server.common.env_vars import CIRCLECI, GIT_TAG, MODEL_CACHE_PVC_SUFFIX +from model_engine_server.common.env_vars import ( + CIRCLECI, + DD_ENV, + GIT_TAG, + MODEL_CACHE_PVC_SUFFIX, +) from model_engine_server.common.resource_limits import ( FORWARDER_CPU_USAGE, FORWARDER_MEMORY_USAGE, @@ -110,6 +115,7 @@ class _BaseDeploymentArguments(_BaseEndpointArguments): IMAGE: str IMAGE_HASH: str DD_TRACE_ENABLED: str + DD_ENV: str CPUS: str MEMORY: str STORAGE_DICT: DictStrStr @@ -667,6 +673,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -721,6 +728,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -777,6 +785,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -830,6 +839,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -885,6 +895,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -934,6 +945,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -985,6 +997,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1047,6 +1060,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1111,6 +1125,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1168,6 +1183,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1230,6 +1246,7 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), + DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, From ad9c4211ea728c943fdcd838d4c31447f27988e2 Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Fri, 26 Jun 2026 12:36:32 -0400 Subject: [PATCH 2/5] fix: tag gateway custom metrics with DD_ENV (per-cluster) instead of infra_config().env --- .../infra/gateways/datadog_monitoring_metrics_gateway.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py b/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py index 93a73970..fc29bec1 100644 --- a/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py @@ -2,7 +2,7 @@ from datadog import statsd from model_engine_server.common.dtos.llms import TokenUsage -from model_engine_server.core.config import infra_config +from model_engine_server.common.env_vars import DD_ENV from model_engine_server.domain.gateways.monitoring_metrics_gateway import ( MetricMetadata, MonitoringMetricsGateway, @@ -23,7 +23,7 @@ def get_model_tags(model_name: Optional[str]) -> List[str]: class DatadogMonitoringMetricsGateway(MonitoringMetricsGateway): def __init__(self, prefix: str = "model_engine"): self.prefix = prefix - self.tags = [f"env:{infra_config().env}"] + self.tags = [f"env:{DD_ENV}"] def emit_attempted_build_metric(self): statsd.increment("scale_launch.service_builder.attempt", tags=self.tags) From 64db255c1d4c495e16544b8baab0cb3c4099f3a6 Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Fri, 26 Jun 2026 15:10:20 -0400 Subject: [PATCH 3/5] fix: resolve launched-pod DD_ENV per-cluster via central load_k8s_yaml injection --- charts/model-engine/templates/_helpers.tpl | 2 +- .../service_template_config_map.yaml | 5 ++--- .../k8s_endpoint_resource_delegate.py | 5 +++++ .../gateways/resources/k8s_resource_types.py | 19 +------------------ 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/charts/model-engine/templates/_helpers.tpl b/charts/model-engine/templates/_helpers.tpl index 1173e616..5d618c37 100644 --- a/charts/model-engine/templates/_helpers.tpl +++ b/charts/model-engine/templates/_helpers.tpl @@ -95,7 +95,7 @@ owner: ${OWNER} env: {{- .Values.context | printf " %s" }} managed-by: {{- include "modelEngine.fullname" . | printf " %s\n" -}} use_scale_launch_endpoint_network_policy: "true" -tags.datadoghq.com/env: {{- .Values.context | printf " %s" }} +tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} {{- if .Values.azure }} azure.workload.identity/use: "true" diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml index 34471f2e..95ae3baf 100644 --- a/charts/model-engine/templates/service_template_config_map.yaml +++ b/charts/model-engine/templates/service_template_config_map.yaml @@ -3,7 +3,6 @@ {{- $forwarder_repository := .Values.image.forwarderRepository -}} {{- $triton_repository := .Values.triton.image.repository -}} {{- $triton_tag := .Values.triton.image.tag -}} -{{- $env := .Values.context -}} {{- $service_template_labels := include "modelEngine.serviceTemplateLabels" . }} {{- $job_template_labels := include "modelEngine.jobTemplateLabels" . }} {{- $service_env := include "modelEngine.serviceEnvGitTagFromPythonReplace" . }} @@ -1084,7 +1083,7 @@ data: sidecar.istio.io/inject: "false" version: v1 annotations: - ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:{{ $env }}", "launch_job_id:${JOB_ID}"]}]' + ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:${DD_ENV}", "launch_job_id:${JOB_ID}"]}]' cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: restartPolicy: Never @@ -1193,7 +1192,7 @@ data: sidecar.istio.io/inject: "false" version: v1 annotations: - ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:{{ $env }}", "launch_job_id:${JOB_ID}"]}]' + ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:${DD_ENV}", "launch_job_id:${JOB_ID}"]}]' cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: restartPolicy: Never diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index f906b4f8..aee7a10d 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -21,6 +21,7 @@ from model_engine_server.common.dtos.resource_manager import CreateOrUpdateResourcesRequest from model_engine_server.common.env_vars import ( CIRCLECI, + DD_ENV, LAUNCH_SERVICE_TEMPLATE_CONFIG_MAP_PATH, LAUNCH_SERVICE_TEMPLATE_FOLDER, MODEL_CACHE_MOUNT_PATH, @@ -277,6 +278,10 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, # K8s/container error at deploy time, rather than a KeyError deep # inside the service-builder celery task. filtered_kwargs = {k: v for k, v in substitution_kwargs.items() if v is not None} + # Inject the Datadog env tag for every launched resource (endpoints, batch jobs, etc.) + # so any ${DD_ENV} in labels / env vars / log configs resolves to the gateway's + # per-cluster env (set via the chart's datadog.env). setdefault lets a caller override. + filtered_kwargs.setdefault("DD_ENV", DD_ENV) yaml_str = Template(template_str).safe_substitute(**filtered_kwargs) try: yaml_obj = yaml.safe_load(yaml_str) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index ec98628c..7aecf362 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -6,12 +6,7 @@ from model_engine_server.common.config import hmi_config from model_engine_server.common.dtos.model_endpoints import BrokerName, BrokerType from model_engine_server.common.dtos.resource_manager import CreateOrUpdateResourcesRequest -from model_engine_server.common.env_vars import ( - CIRCLECI, - DD_ENV, - GIT_TAG, - MODEL_CACHE_PVC_SUFFIX, -) +from model_engine_server.common.env_vars import CIRCLECI, GIT_TAG, MODEL_CACHE_PVC_SUFFIX from model_engine_server.common.resource_limits import ( FORWARDER_CPU_USAGE, FORWARDER_MEMORY_USAGE, @@ -115,7 +110,6 @@ class _BaseDeploymentArguments(_BaseEndpointArguments): IMAGE: str IMAGE_HASH: str DD_TRACE_ENABLED: str - DD_ENV: str CPUS: str MEMORY: str STORAGE_DICT: DictStrStr @@ -673,7 +667,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -728,7 +721,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -785,7 +777,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -839,7 +830,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -895,7 +885,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -945,7 +934,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -997,7 +985,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1060,7 +1047,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1125,7 +1111,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1183,7 +1168,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, @@ -1246,7 +1230,6 @@ def get_endpoint_resource_arguments_from_request( IMAGE=request.image, IMAGE_HASH=image_hash, DD_TRACE_ENABLED=str(dd_trace_enabled), - DD_ENV=DD_ENV, CPUS=str(build_endpoint_request.cpus), MEMORY=str(build_endpoint_request.memory), STORAGE_DICT=storage_dict, From 5064a966bbe2bde3a8b9603ab374b06812d27c94 Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Fri, 26 Jun 2026 15:32:11 -0400 Subject: [PATCH 4/5] chore: regenerate circleci service-template fallback for DD_ENV substitution --- .../service_template_config_map_circleci.yaml | 128 +++++++++--------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml b/model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml index 8d4c5228..32c99164 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml +++ b/model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml @@ -12,7 +12,7 @@ metadata: tags.datadoghq.com/env: circleci env: circleci product: model-engine - helm.sh/chart: model-engine-0.2.4 + helm.sh/chart: model-engine-0.2.7 app.kubernetes.io/managed-by: Helm annotations: "helm.sh/hook": pre-install,pre-upgrade @@ -33,7 +33,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -68,7 +68,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -135,7 +135,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -306,7 +306,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -341,7 +341,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -408,7 +408,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -533,7 +533,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -561,7 +561,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -630,7 +630,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -807,7 +807,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -835,7 +835,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -904,7 +904,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -1035,7 +1035,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1063,7 +1063,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1140,7 +1140,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -1271,7 +1271,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1306,7 +1306,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1379,7 +1379,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -1552,7 +1552,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1587,7 +1587,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1660,7 +1660,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -1787,7 +1787,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1815,7 +1815,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -1890,7 +1890,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -2069,7 +2069,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2097,7 +2097,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2172,7 +2172,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -2305,7 +2305,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2333,7 +2333,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2416,7 +2416,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -2549,7 +2549,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2571,7 +2571,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2593,7 +2593,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2619,7 +2619,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2637,7 +2637,7 @@ data: metric: name: request-concurrency-average target: - type: Value + type: AverageValue averageValue: ${CONCURRENCY} keda-scaled-object.yaml: |- apiVersion: keda.sh/v1alpha1 @@ -2654,7 +2654,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2701,7 +2701,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2724,7 +2724,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -2797,6 +2797,8 @@ data: - "forwarder.sync.routes=${FORWARDER_SYNC_ROUTES}" - --set - "forwarder.stream.routes=${FORWARDER_STREAM_ROUTES}" + - --set + - "max_concurrency=${FORWARDER_MAX_CONCURRENCY}" env: - name: DD_TRACE_ENABLED value: "${DD_TRACE_ENABLED}" @@ -2805,7 +2807,7 @@ data: - name: DD_SERVICE value: "${ENDPOINT_NAME}" - name: DD_ENV - value: circleci + value: "${DD_ENV}" - name: DD_VERSION value: "${GIT_TAG}" - name: DD_AGENT_HOST @@ -2929,7 +2931,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3038,7 +3040,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3068,7 +3070,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3099,7 +3101,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3130,7 +3132,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3155,7 +3157,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3183,7 +3185,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3222,7 +3224,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} tags.datadoghq.com/service: ${ENDPOINT_NAME} endpoint_id: ${ENDPOINT_ID} @@ -3246,7 +3248,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3268,7 +3270,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3278,7 +3280,7 @@ data: sidecar.istio.io/inject: "false" version: v1 annotations: - ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:circleci", "launch_job_id:${JOB_ID}"]}]' + ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:${DD_ENV}", "launch_job_id:${JOB_ID}"]}]' cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: restartPolicy: Never @@ -3311,8 +3313,6 @@ data: value: "true" - name: DD_REMOTE_CONFIGURATION_ENABLED value: "false" - - name: DD_ENV - value: circleci - name: DD_AGENT_HOST valueFrom: fieldRef: @@ -3352,6 +3352,8 @@ data: value: "600" - name: CIRCLECI value: "true" + - name: DD_ENV + value: ${DD_ENV} - name: DD_VERSION value: ${GIT_TAG} - name: GIT_TAG @@ -3406,7 +3408,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3431,7 +3433,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3441,7 +3443,7 @@ data: sidecar.istio.io/inject: "false" version: v1 annotations: - ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:circleci", "launch_job_id:${JOB_ID}"]}]' + ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:${DD_ENV}", "launch_job_id:${JOB_ID}"]}]' cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: restartPolicy: Never @@ -3473,8 +3475,6 @@ data: value: "true" - name: DD_REMOTE_CONFIGURATION_ENABLED value: "false" - - name: DD_ENV - value: circleci - name: DD_AGENT_HOST valueFrom: fieldRef: @@ -3514,6 +3514,8 @@ data: value: "600" - name: CIRCLECI value: "true" + - name: DD_ENV + value: ${DD_ENV} - name: DD_VERSION value: ${GIT_TAG} - name: GIT_TAG @@ -3586,7 +3588,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3611,7 +3613,7 @@ data: env: circleci managed-by: model-engine use_scale_launch_endpoint_network_policy: "true" - tags.datadoghq.com/env: circleci + tags.datadoghq.com/env: ${DD_ENV} tags.datadoghq.com/version: ${GIT_TAG} launch_job_id: ${JOB_ID} tags.datadoghq.com/request_id: ${REQUEST_ID} @@ -3621,7 +3623,7 @@ data: sidecar.istio.io/inject: "false" version: v1 annotations: - ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:circleci", "launch_job_id:${JOB_ID}"]}]' + ad.datadoghq.com/main.logs: '[{"source": "python", "service": "${RESOURCE_NAME}", "tags": ["env:${DD_ENV}", "launch_job_id:${JOB_ID}"]}]' cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: restartPolicy: Never @@ -3659,8 +3661,6 @@ data: value: "true" - name: DD_REMOTE_CONFIGURATION_ENABLED value: "false" - - name: DD_ENV - value: circleci - name: DD_AGENT_HOST valueFrom: fieldRef: @@ -3700,6 +3700,8 @@ data: value: "600" - name: CIRCLECI value: "true" + - name: DD_ENV + value: ${DD_ENV} - name: DD_VERSION value: ${GIT_TAG} - name: GIT_TAG From fa9ed7185b2c879961d0730ed28749df21188744 Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Fri, 26 Jun 2026 15:32:57 -0400 Subject: [PATCH 5/5] fix: copy self.tags in DatadogMonitoringMetricsGateway to prevent tag leakage across emissions --- .../infra/gateways/datadog_monitoring_metrics_gateway.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py b/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py index fc29bec1..ab5da012 100644 --- a/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py @@ -56,9 +56,8 @@ def emit_database_cache_miss_metric(self): statsd.increment("scale_launch.database_cache.miss", tags=self.tags) def _format_call_tags(self, metadata: MetricMetadata) -> List[str]: - tags = self.tags - tags.extend(get_model_tags(metadata.model_name)) - return tags + # Copy self.tags; extending it in place would leak per-call tags across emissions. + return [*self.tags, *get_model_tags(metadata.model_name)] def emit_route_call_metric(self, route: str, metadata: MetricMetadata): statsd.increment(f"{self.prefix}.{route}.call", tags=self._format_call_tags(metadata)) @@ -87,6 +86,6 @@ def emit_token_count_metrics(self, token_usage: TokenUsage, metadata: MetricMeta statsd.distribution(inter_token_latency, token_usage.inter_token_latency, tags=tags) def emit_http_call_error_metrics(self, endpoint_name: str, error_code: int): - tags = self.tags - tags.extend([f"endpoint_name:{endpoint_name}", f"error_code:{error_code}"]) + # Copy self.tags; extending it in place would leak per-call tags across emissions. + tags = [*self.tags, f"endpoint_name:{endpoint_name}", f"error_code:{error_code}"] statsd.increment(f"{self.prefix}.upstream_sync_error", tags=tags)