From e0bf02029f3515b8aae061a89b39e17a8bd2b8c6 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 16 May 2026 11:45:08 -0700 Subject: [PATCH 1/2] refactor(monitoring): delete per-SND ServiceMonitor wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the per-SeiNodeDeployment ServiceMonitor reconciler. The platform-owned cluster-wide PodMonitor (platform PR #577) covers every seid pod via the `sei.io/role` label, and the per-SND wrapper has been silently broken anyway (selecting an external Service that strips the `metrics` port — confirmed in cross-review). File deletions: - `internal/controller/nodedeployment/monitoring.go` (153 LOC reconciler) - `internal/controller/nodedeployment/monitoring_test.go` (191 LOC tests) - `api/v1alpha1/monitoring_types.go` (`MonitoringConfig` + `ServiceMonitorConfig`) Field/constant removals: - `Monitoring *MonitoringConfig` on `SeiNodeDeploymentSpec` - `ConditionServiceMonitorReady` constant - `+kubebuilder:rbac` marker for `monitoring.coreos.com/servicemonitors` - `reconcileMonitoring(...)` call site in `controller.go` - `serviceMonitorGVK()` from the orphan-GVK slice at `networking.go:404` - `monitoring:` block from the sample `pacific-1-rpc-group.yaml` Regenerated: - `config/crd/sei.io_seinodedeployments.yaml` (drops monitoring schema block) - `manifests/sei.io_seinodedeployments.yaml` (same) - `config/rbac/role.yaml` + `manifests/role.yaml` (drops servicemonitors rule) - `api/v1alpha1/zz_generated.deepcopy.go` (drops MonitoringConfig DeepCopy) Net: -499 LOC, full test suite still green. Post-deploy aftercare: one-shot sweep to clean orphan ServiceMonitor objects that the deleted reconciler will no longer manage: kubectl delete servicemonitor -n pacific-1 \ archive-0 archive-1 archive-2 node-0 shadow-giga syncer-0 Platform manifest cleanup landed first in sei-protocol/platform#579. --- api/v1alpha1/monitoring_types.go | 22 -- api/v1alpha1/seinodedeployment_types.go | 6 - api/v1alpha1/zz_generated.deepcopy.go | 47 ----- config/crd/sei.io_seinodedeployments.yaml | 22 -- config/rbac/role.yaml | 12 -- .../controller/nodedeployment/controller.go | 6 - .../controller/nodedeployment/monitoring.go | 153 -------------- .../nodedeployment/monitoring_test.go | 191 ------------------ .../controller/nodedeployment/networking.go | 2 +- manifests/role.yaml | 12 -- .../pacific-1-rpc-group.yaml | 6 - manifests/sei.io_seinodedeployments.yaml | 22 -- 12 files changed, 1 insertion(+), 500 deletions(-) delete mode 100644 api/v1alpha1/monitoring_types.go delete mode 100644 internal/controller/nodedeployment/monitoring.go delete mode 100644 internal/controller/nodedeployment/monitoring_test.go diff --git a/api/v1alpha1/monitoring_types.go b/api/v1alpha1/monitoring_types.go deleted file mode 100644 index 4bf5e6b8..00000000 --- a/api/v1alpha1/monitoring_types.go +++ /dev/null @@ -1,22 +0,0 @@ -package v1alpha1 - -// MonitoringConfig controls observability resources. -type MonitoringConfig struct { - // ServiceMonitor creates a monitoring.coreos.com/v1 ServiceMonitor. - // Presence (non-nil) enables it; set to nil to disable. - // +optional - ServiceMonitor *ServiceMonitorConfig `json:"serviceMonitor,omitempty"` -} - -// ServiceMonitorConfig defines the ServiceMonitor. -type ServiceMonitorConfig struct { - // Interval is the Prometheus scrape interval. - // +optional - // +kubebuilder:default="30s" - // +kubebuilder:validation:Pattern="^[0-9]+(ms|s|m|h)$" - Interval string `json:"interval,omitempty"` - - // Labels are added to the ServiceMonitor metadata. - // +optional - Labels map[string]string `json:"labels,omitempty"` -} diff --git a/api/v1alpha1/seinodedeployment_types.go b/api/v1alpha1/seinodedeployment_types.go index 4ca9baa8..18f2c294 100644 --- a/api/v1alpha1/seinodedeployment_types.go +++ b/api/v1alpha1/seinodedeployment_types.go @@ -39,11 +39,6 @@ type SeiNodeDeploymentSpec struct { // +optional Networking *NetworkingConfig `json:"networking,omitempty"` - // Monitoring configures observability resources shared across - // all replicas. - // +optional - Monitoring *MonitoringConfig `json:"monitoring,omitempty"` - // UpdateStrategy controls how changes to the template are rolled out // to child SeiNodes. Every deployment must declare an explicit strategy. UpdateStrategy UpdateStrategy `json:"updateStrategy"` @@ -400,7 +395,6 @@ type RolloutStatus struct { const ( ConditionNodesReady = "NodesReady" ConditionRouteReady = "RouteReady" - ConditionServiceMonitorReady = "ServiceMonitorReady" ConditionGenesisCeremonyComplete = "GenesisCeremonyComplete" ConditionPlanInProgress = "PlanInProgress" ConditionGenesisCeremonyNeeded = "GenesisCeremonyNeeded" diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index efa79705..87d74856 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -318,26 +318,6 @@ func (in *LabelPeerSource) DeepCopy() *LabelPeerSource { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MonitoringConfig) DeepCopyInto(out *MonitoringConfig) { - *out = *in - if in.ServiceMonitor != nil { - in, out := &in.ServiceMonitor, &out.ServiceMonitor - *out = new(ServiceMonitorConfig) - (*in).DeepCopyInto(*out) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MonitoringConfig. -func (in *MonitoringConfig) DeepCopy() *MonitoringConfig { - if in == nil { - return nil - } - out := new(MonitoringConfig) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NetworkingConfig) DeepCopyInto(out *NetworkingConfig) { *out = *in @@ -771,11 +751,6 @@ func (in *SeiNodeDeploymentSpec) DeepCopyInto(out *SeiNodeDeploymentSpec) { *out = new(NetworkingConfig) **out = **in } - if in.Monitoring != nil { - in, out := &in.Monitoring, &out.Monitoring - *out = new(MonitoringConfig) - (*in).DeepCopyInto(*out) - } in.UpdateStrategy.DeepCopyInto(&out.UpdateStrategy) } @@ -1040,28 +1015,6 @@ func (in *SeiNodeTemplateMeta) DeepCopy() *SeiNodeTemplateMeta { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ServiceMonitorConfig) DeepCopyInto(out *ServiceMonitorConfig) { - *out = *in - if in.Labels != nil { - in, out := &in.Labels, &out.Labels - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceMonitorConfig. -func (in *ServiceMonitorConfig) DeepCopy() *ServiceMonitorConfig { - if in == nil { - return nil - } - out := new(ServiceMonitorConfig) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ShadowResultConfig) DeepCopyInto(out *ShadowResultConfig) { *out = *in diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 329ad8db..f3401382 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -127,28 +127,6 @@ spec: required: - chainId type: object - monitoring: - description: |- - Monitoring configures observability resources shared across - all replicas. - properties: - serviceMonitor: - description: |- - ServiceMonitor creates a monitoring.coreos.com/v1 ServiceMonitor. - Presence (non-nil) enables it; set to nil to disable. - properties: - interval: - default: 30s - description: Interval is the Prometheus scrape interval. - pattern: ^[0-9]+(ms|s|m|h)$ - type: string - labels: - additionalProperties: - type: string - description: Labels are added to the ServiceMonitor metadata. - type: object - type: object - type: object networking: description: |- Networking enables public networking for the deployment. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 108e18d5..009ef916 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -90,18 +90,6 @@ rules: - patch - update - watch -- apiGroups: - - monitoring.coreos.com - resources: - - servicemonitors - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - apiGroups: - sei.io resources: diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index 60065f5d..879534bb 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -54,7 +54,6 @@ type SeiNodeDeploymentReconciler struct { // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=httproutes,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete func (r *SeiNodeDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) @@ -117,11 +116,6 @@ func (r *SeiNodeDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Re return planResult, nil } - if err := r.reconcileMonitoring(ctx, group); err != nil { - logger.Error(err, "reconciling monitoring") - return ctrl.Result{}, fmt.Errorf("reconciling monitoring: %w", err) - } - if err := r.updateStatus(ctx, group, statusBase); err != nil { return ctrl.Result{}, fmt.Errorf("updating status: %w", err) } diff --git a/internal/controller/nodedeployment/monitoring.go b/internal/controller/nodedeployment/monitoring.go deleted file mode 100644 index 3d510bc4..00000000 --- a/internal/controller/nodedeployment/monitoring.go +++ /dev/null @@ -1,153 +0,0 @@ -package nodedeployment - -import ( - "context" - "fmt" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -func (r *SeiNodeDeploymentReconciler) reconcileMonitoring(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { - if group.Spec.Monitoring == nil || group.Spec.Monitoring.ServiceMonitor == nil { - removeCondition(group, seiv1alpha1.ConditionServiceMonitorReady) - return r.deleteUnstructured(ctx, group, serviceMonitorGVK()) - } - return r.reconcileServiceMonitor(ctx, group) -} - -func (r *SeiNodeDeploymentReconciler) reconcileServiceMonitor(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { - desired := generateServiceMonitor(group) - if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { - return fmt.Errorf("setting owner reference on ServiceMonitor: %w", err) - } - - //nolint:staticcheck // migrating unstructured SSA to typed ApplyConfiguration is a separate effort - err := r.Patch(ctx, desired, client.Apply, fieldOwner, client.ForceOwnership) - if meta.IsNoMatchError(err) { - if !hasConditionReason(group, seiv1alpha1.ConditionServiceMonitorReady, "CRDNotInstalled") { - r.Recorder.Event(group, corev1.EventTypeWarning, "CRDNotInstalled", "Prometheus Operator CRD (ServiceMonitor) is not installed; monitoring will not be configured") - } - setCondition(group, seiv1alpha1.ConditionServiceMonitorReady, metav1.ConditionFalse, - "CRDNotInstalled", "Prometheus Operator CRD (ServiceMonitor) is not installed") - return nil - } - if err != nil { - return err - } - if !hasConditionReason(group, seiv1alpha1.ConditionServiceMonitorReady, "ServiceMonitorReady") { - r.Recorder.Event(group, corev1.EventTypeNormal, "ServiceMonitorReady", "ServiceMonitor reconciled successfully") - } - setCondition(group, seiv1alpha1.ConditionServiceMonitorReady, metav1.ConditionTrue, - "ServiceMonitorReady", "ServiceMonitor reconciled successfully") - return nil -} - -func generateServiceMonitor(group *seiv1alpha1.SeiNodeDeployment) *unstructured.Unstructured { - cfg := group.Spec.Monitoring.ServiceMonitor - - interval := cfg.Interval - if interval == "" { - interval = "30s" - } - - labels := make(map[string]any) - for k, v := range cfg.Labels { - labels[k] = v - } - for k, v := range resourceLabels(group) { - labels[k] = v - } - - sm := &unstructured.Unstructured{ - Object: map[string]any{ - "apiVersion": "monitoring.coreos.com/v1", - "kind": "ServiceMonitor", - "metadata": map[string]any{ - "name": group.Name, - "namespace": group.Namespace, - "labels": labels, - "annotations": toStringInterfaceMap(managedByAnnotations()), - }, - "spec": map[string]any{ - "selector": map[string]any{ - "matchLabels": toStringInterfaceMap(groupSelector(group)), - }, - "endpoints": []any{endpointSpec(group, interval)}, - }, - }, - } - return sm -} - -func endpointSpec(group *seiv1alpha1.SeiNodeDeployment, interval string) map[string]any { - ep := map[string]any{ - "port": "metrics", - "interval": interval, - } - var relabelings []any - if component := deriveComponent(&group.Spec.Template.Spec); component != "" { - relabelings = append(relabelings, map[string]any{ - "action": "replace", - "regex": ".*", - "replacement": component, - "targetLabel": "component", - }) - } - if chainID := group.Spec.Template.Spec.ChainID; chainID != "" { - relabelings = append(relabelings, map[string]any{ - "action": "replace", - "regex": ".*", - "replacement": chainID, - "targetLabel": "chain_id", - }) - } - // EC2-compat shim: stamp legacy labels from `pod` so dashboards that - // filter on instance_name / public_dns work for both EC2 and k8s scrapes. - // TODO(sei-protocol/sei-k8s-controller#122): remove after EC2 scrape decommission. - relabelings = append(relabelings, - map[string]any{ - "action": "replace", - "sourceLabels": []any{"pod"}, - "targetLabel": "instance_name", - }, - map[string]any{ - "action": "replace", - "sourceLabels": []any{"pod"}, - "targetLabel": "public_dns", - }, - ) - ep["metricRelabelings"] = relabelings - return ep -} - -// deriveComponent returns the `component` label value for the SND's role, -// or "" if no role is set (caller omits the relabeling in that case). -func deriveComponent(spec *seiv1alpha1.SeiNodeSpec) string { - switch { - case spec.Validator != nil: - return "validator" - case spec.Archive != nil: - return "archive" - case spec.Replayer != nil: - return "replayer" - case spec.FullNode != nil: - return "node" - } - return "" -} - -func serviceMonitorGVK() schema.GroupVersionKind { - return schema.GroupVersionKind{ - Group: "monitoring.coreos.com", - Version: "v1", - Kind: "ServiceMonitor", - } -} diff --git a/internal/controller/nodedeployment/monitoring_test.go b/internal/controller/nodedeployment/monitoring_test.go deleted file mode 100644 index 4b27e154..00000000 --- a/internal/controller/nodedeployment/monitoring_test.go +++ /dev/null @@ -1,191 +0,0 @@ -package nodedeployment - -import ( - "testing" - - . "github.com/onsi/gomega" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -func TestGenerateServiceMonitor_BasicFields(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("archive-rpc", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{ - Interval: "15s", - }, - } - - sm := generateServiceMonitor(group) - - g.Expect(sm.GetName()).To(Equal("archive-rpc")) - g.Expect(sm.GetNamespace()).To(Equal("sei")) - - spec := sm.Object["spec"].(map[string]any) - selector := spec["selector"].(map[string]any) - matchLabels := selector["matchLabels"].(map[string]any) - g.Expect(matchLabels[groupLabel]).To(Equal("archive-rpc")) - - endpoints := spec["endpoints"].([]any) - g.Expect(endpoints).To(HaveLen(1)) - - ep := endpoints[0].(map[string]any) - g.Expect(ep["port"]).To(Equal("metrics")) - g.Expect(ep["interval"]).To(Equal("15s")) -} - -func TestGenerateServiceMonitor_DefaultInterval(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("archive-rpc", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{}, - } - - sm := generateServiceMonitor(group) - - spec := sm.Object["spec"].(map[string]any) - endpoints := spec["endpoints"].([]any) - ep := endpoints[0].(map[string]any) - g.Expect(ep["port"]).To(Equal("metrics")) - g.Expect(ep["interval"]).To(Equal("30s")) -} - -func TestGenerateServiceMonitor_CustomLabels(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("archive-rpc", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{ - Labels: map[string]string{ - "release": "prometheus", - }, - }, - } - - sm := generateServiceMonitor(group) - - metadata := sm.Object["metadata"].(map[string]any) - labels := metadata["labels"].(map[string]any) - g.Expect(labels["release"]).To(Equal("prometheus")) - g.Expect(labels[groupLabel]).To(Equal("archive-rpc")) -} - -func TestGenerateServiceMonitor_ComponentRelabeling(t *testing.T) { - cases := []struct { - name string - mutate func(*seiv1alpha1.SeiNodeSpec) - expected string - }{ - {"fullNode", func(s *seiv1alpha1.SeiNodeSpec) {}, "node"}, - {"validator", func(s *seiv1alpha1.SeiNodeSpec) { - s.FullNode = nil - s.Validator = &seiv1alpha1.ValidatorSpec{} - }, "validator"}, - {"archive", func(s *seiv1alpha1.SeiNodeSpec) { - s.FullNode = nil - s.Archive = &seiv1alpha1.ArchiveSpec{} - }, "archive"}, - {"replayer", func(s *seiv1alpha1.SeiNodeSpec) { - s.FullNode = nil - s.Replayer = &seiv1alpha1.ReplayerSpec{} - }, "replayer"}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("role-test", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{}, - } - tc.mutate(&group.Spec.Template.Spec) - - sm := generateServiceMonitor(group) - spec := sm.Object["spec"].(map[string]any) - ep := spec["endpoints"].([]any)[0].(map[string]any) - relabelings := ep["metricRelabelings"].([]any) - g.Expect(findRelabeling(relabelings, "component")).To(Equal(tc.expected)) - }) - } -} - -func TestGenerateServiceMonitor_ChainIDRelabeling(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("role-test", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{}, - } - - sm := generateServiceMonitor(group) - spec := sm.Object["spec"].(map[string]any) - ep := spec["endpoints"].([]any)[0].(map[string]any) - relabelings := ep["metricRelabelings"].([]any) - g.Expect(findRelabeling(relabelings, "chain_id")).To(Equal("pacific-1")) -} - -func TestGenerateServiceMonitor_OnlyEC2CompatWhenNothingDerivable(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("role-test", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{}, - } - group.Spec.Template.Spec.FullNode = nil - group.Spec.Template.Spec.ChainID = "" - - sm := generateServiceMonitor(group) - spec := sm.Object["spec"].(map[string]any) - ep := spec["endpoints"].([]any)[0].(map[string]any) - relabelings := ep["metricRelabelings"].([]any) - g.Expect(findRelabeling(relabelings, "component")).To(Equal("")) - g.Expect(findRelabeling(relabelings, "chain_id")).To(Equal("")) - g.Expect(findSourceRelabeling(relabelings, "instance_name")).To(Equal("pod")) - g.Expect(findSourceRelabeling(relabelings, "public_dns")).To(Equal("pod")) -} - -func TestGenerateServiceMonitor_EC2CompatLabels(t *testing.T) { - g := NewWithT(t) - group := newTestGroup("role-test", "sei") - group.Spec.Monitoring = &seiv1alpha1.MonitoringConfig{ - ServiceMonitor: &seiv1alpha1.ServiceMonitorConfig{}, - } - - sm := generateServiceMonitor(group) - spec := sm.Object["spec"].(map[string]any) - ep := spec["endpoints"].([]any)[0].(map[string]any) - relabelings := ep["metricRelabelings"].([]any) - g.Expect(findSourceRelabeling(relabelings, "instance_name")).To(Equal("pod")) - g.Expect(findSourceRelabeling(relabelings, "public_dns")).To(Equal("pod")) -} - -// findRelabeling returns the `replacement` string for a metricRelabeling -// targeting the given label, or "" if no such rule exists. -func findRelabeling(relabelings []any, targetLabel string) string { - for _, r := range relabelings { - rule := r.(map[string]any) - if rule["targetLabel"] != targetLabel { - continue - } - repl, ok := rule["replacement"].(string) - if !ok { - continue - } - return repl - } - return "" -} - -// findSourceRelabeling returns the first sourceLabels entry for a metricRelabeling -// targeting the given label, or "" if no such rule exists. -func findSourceRelabeling(relabelings []any, targetLabel string) string { - for _, r := range relabelings { - rule := r.(map[string]any) - if rule["targetLabel"] != targetLabel { - continue - } - src, ok := rule["sourceLabels"].([]any) - if !ok || len(src) == 0 { - continue - } - return src[0].(string) - } - return "" -} diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 9ccb1146..806b1b07 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -401,7 +401,7 @@ func (r *SeiNodeDeploymentReconciler) orphanNetworkingResources(ctx context.Cont return fmt.Errorf("fetching external Service for orphan: %w", err) } - for _, gvk := range []schema.GroupVersionKind{httpRouteGVK(), serviceMonitorGVK()} { + for _, gvk := range []schema.GroupVersionKind{httpRouteGVK()} { list := &unstructured.UnstructuredList{} list.SetGroupVersionKind(gvk) listErr := r.List(ctx, list, client.InNamespace(group.Namespace), client.MatchingLabels(resourceLabels(group))) diff --git a/manifests/role.yaml b/manifests/role.yaml index 108e18d5..009ef916 100644 --- a/manifests/role.yaml +++ b/manifests/role.yaml @@ -90,18 +90,6 @@ rules: - patch - update - watch -- apiGroups: - - monitoring.coreos.com - resources: - - servicemonitors - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - apiGroups: - sei.io resources: diff --git a/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml b/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml index d39d0eb8..33174934 100644 --- a/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml +++ b/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml @@ -37,9 +37,3 @@ spec: trustPeriod: "9999h0m0s" networking: {} - - monitoring: - serviceMonitor: - interval: "30s" - labels: - release: prometheus diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 329ad8db..f3401382 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -127,28 +127,6 @@ spec: required: - chainId type: object - monitoring: - description: |- - Monitoring configures observability resources shared across - all replicas. - properties: - serviceMonitor: - description: |- - ServiceMonitor creates a monitoring.coreos.com/v1 ServiceMonitor. - Presence (non-nil) enables it; set to nil to disable. - properties: - interval: - default: 30s - description: Interval is the Prometheus scrape interval. - pattern: ^[0-9]+(ms|s|m|h)$ - type: string - labels: - additionalProperties: - type: string - description: Labels are added to the ServiceMonitor metadata. - type: object - type: object - type: object networking: description: |- Networking enables public networking for the deployment. From 35f3d4767e71ff5b989f7c453bf09b16dcf5c810 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 16 May 2026 11:53:33 -0700 Subject: [PATCH 2/2] chore: scrub stale ServiceMonitor refs from comments and sample MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final correctness sweep (cross-reviewed by k8s + platform experts) caught 6 stale references the initial deletion left behind: - DeletionPolicy doc in seinodedeployment_types.go said "networking/monitoring resources" — now just networking - controller.go Reconcile comment said "during networking/monitoring reconciliation" — drop /monitoring - labels.go groupSelector doc said "Service, HTTPRoutes, and ServiceMonitor" — SM is gone, just Service and HTTPRoutes - networking.go externalPortsForMode rationale referenced the deleted wrapper's double-scrape bug — rewrite to reference the platform PodMonitor as the current scrape mechanism - noderesource.go deriveRole doc claimed it mirrors nodedeployment.deriveComponent (deleted) — rewrite to describe its actual role: stamp sei.io/role onto pod template, lifted by the platform PodMonitor relabeling - pacific-1-rpc-group.yaml sample header said "and Prometheus monitoring" — drop, the SND no longer provisions any CRD/RBAC regen captures the type-doc change. --- api/v1alpha1/seinodedeployment_types.go | 2 +- config/crd/sei.io_seinodedeployments.yaml | 2 +- internal/controller/nodedeployment/controller.go | 2 +- internal/controller/nodedeployment/labels.go | 6 +++--- internal/controller/nodedeployment/networking.go | 6 +++--- internal/noderesource/noderesource.go | 6 ++++-- .../samples/seinodedeployment/pacific-1-rpc-group.yaml | 6 +++--- manifests/sei.io_seinodedeployments.yaml | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/api/v1alpha1/seinodedeployment_types.go b/api/v1alpha1/seinodedeployment_types.go index 18f2c294..e3429a41 100644 --- a/api/v1alpha1/seinodedeployment_types.go +++ b/api/v1alpha1/seinodedeployment_types.go @@ -19,7 +19,7 @@ type SeiNodeDeploymentSpec struct { Template SeiNodeTemplate `json:"template"` // DeletionPolicy controls what happens to child SeiNodes and managed - // networking/monitoring resources when the SeiNodeDeployment is deleted. + // networking resources when the SeiNodeDeployment is deleted. // "Delete" (default) cascades deletion. "Retain" orphans children // and networking resources so they continue running independently. // +optional diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index f3401382..c47cc12e 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -62,7 +62,7 @@ spec: default: Delete description: |- DeletionPolicy controls what happens to child SeiNodes and managed - networking/monitoring resources when the SeiNodeDeployment is deleted. + networking resources when the SeiNodeDeployment is deleted. "Delete" (default) cascades deletion. "Retain" orphans children and networking resources so they continue running independently. enum: diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index 879534bb..8b5d1ae1 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -75,7 +75,7 @@ func (r *SeiNodeDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Re } // Snapshot the status before any reconciliation mutates it in memory. - // Conditions set during networking/monitoring reconciliation are captured + // Conditions set during networking reconciliation are captured // in the diff when updateStatus patches against this base. statusBase := client.MergeFromWithOptions(group.DeepCopy(), client.MergeFromWithOptimisticLock{}) ns, name := group.Namespace, group.Name diff --git a/internal/controller/nodedeployment/labels.go b/internal/controller/nodedeployment/labels.go index 051540ec..2787cd3c 100644 --- a/internal/controller/nodedeployment/labels.go +++ b/internal/controller/nodedeployment/labels.go @@ -39,9 +39,9 @@ func externalServiceName(group *seiv1alpha1.SeiNodeDeployment) string { } // groupSelector returns the label selector used by the shared external -// Service, HTTPRoutes, and ServiceMonitor. During an active -// deployment, it includes the revision label to pin traffic to the -// active set. At steady state, it selects by group membership only. +// Service and HTTPRoutes. During an active deployment, it includes the +// revision label to pin traffic to the active set. At steady state, it +// selects by group membership only. func groupSelector(group *seiv1alpha1.SeiNodeDeployment) map[string]string { if group.Status.Rollout != nil { return map[string]string{ diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 806b1b07..d8322b64 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -100,9 +100,9 @@ func generateExternalService(group *seiv1alpha1.SeiNodeDeployment) *corev1.Servi } // externalPortsForMode returns the public-facing port set — portsForMode -// minus the `metrics` port, which belongs only on the per-pod headless -// Services. Including it on the external Service made ServiceMonitor -// selectors match both, so each pod was scraped twice. +// minus the `metrics` port, which is a private scrape endpoint. The +// platform-owned PodMonitor scrapes pods directly via the pod IP, so +// the metrics port has no reason to surface on the public LB. // // TODO(sei-protocol/sei-config#7): replace with seiconfig.ExternalServicePorts // once that helper lands upstream. Keeping the filter local to the controller diff --git a/internal/noderesource/noderesource.go b/internal/noderesource/noderesource.go index 0dd68e12..fbf76356 100644 --- a/internal/noderesource/noderesource.go +++ b/internal/noderesource/noderesource.go @@ -133,8 +133,10 @@ func ResourceLabels(node *seiv1alpha1.SeiNode) map[string]string { return labels } -// deriveRole mirrors nodedeployment.deriveComponent so the pod label -// and the ServiceMonitor relabel-output stay in lock-step. +// deriveRole returns the role label value for the node's mode. Stamped +// onto the pod template as `sei.io/role` and lifted into the `sei_role` +// metric label by the platform PodMonitor (see +// platform/clusters/*/monitoring/podmonitor-seid.yaml). func deriveRole(node *seiv1alpha1.SeiNode) string { switch { case node.Spec.Validator != nil: diff --git a/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml b/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml index 33174934..81751bbb 100644 --- a/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml +++ b/manifests/samples/seinodedeployment/pacific-1-rpc-group.yaml @@ -1,8 +1,8 @@ # SeiNodeDeployment — Pacific-1 RPC Fleet # -# Three full nodes with public Gateway API routing and Prometheus -# monitoring. Each SeiNode bootstraps from an S3 snapshot and -# block-syncs to tip. Genesis is resolved automatically by the sidecar. +# Three full nodes with public Gateway API routing. Each SeiNode +# bootstraps from an S3 snapshot and block-syncs to tip. Genesis is +# resolved automatically by the sidecar. apiVersion: sei.io/v1alpha1 kind: SeiNodeDeployment metadata: diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index f3401382..c47cc12e 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -62,7 +62,7 @@ spec: default: Delete description: |- DeletionPolicy controls what happens to child SeiNodes and managed - networking/monitoring resources when the SeiNodeDeployment is deleted. + networking resources when the SeiNodeDeployment is deleted. "Delete" (default) cascades deletion. "Retain" orphans children and networking resources so they continue running independently. enum: