diff --git a/api/v1alpha1/common_types.go b/api/v1alpha1/common_types.go index cc4c9dd..a1ea02f 100644 --- a/api/v1alpha1/common_types.go +++ b/api/v1alpha1/common_types.go @@ -192,24 +192,21 @@ type SidecarConfig struct { // +optional Resources *corev1.ResourceRequirements `json:"resources,omitempty"` - // TLS, if set, fronts the sidecar API with kube-rbac-proxy on - // :8443 backed by a cert-manager-issued cert. Init-only: - // toggling on a Running SeiNode requires recreation. See seictl#165. + // TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + // using TLS material from a Secret in the SeiNode's namespace. The + // Secret is operator-provisioned; this controller does not create + // it. Immutable. // +optional TLS *SidecarTLSSpec `json:"tls,omitempty"` } -// SidecarTLSSpec configures the cert-manager-issued serving cert for -// the kube-rbac-proxy fronting. +// SidecarTLSSpec references an externally-provisioned TLS Secret. type SidecarTLSSpec struct { - // IssuerName references a cert-manager Issuer or ClusterIssuer - // that signs the proxy's serving certificate. + // SecretName is a kubernetes.io/tls Secret in the SeiNode's + // namespace. The cert SANs must include the DNS names published + // in status.sidecarTLS.requiredDNSNames; the controller validates + // this before allowing the pod to schedule. // +kubebuilder:validation:Required - IssuerName string `json:"issuerName"` - - // IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - // +kubebuilder:default=ClusterIssuer - // +kubebuilder:validation:Enum=Issuer;ClusterIssuer - // +optional - IssuerKind string `json:"issuerKind,omitempty"` + // +kubebuilder:validation:MinLength=1 + SecretName string `json:"secretName"` } diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index e8ad503..e9444c7 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -10,6 +10,7 @@ import ( // the populated field determines the node's operating mode. // +kubebuilder:validation:XValidation:rule="(has(self.fullNode) ? 1 : 0) + (has(self.archive) ? 1 : 0) + (has(self.replayer) ? 1 : 0) + (has(self.validator) ? 1 : 0) == 1",message="exactly one of fullNode, archive, replayer, or validator must be set" // +kubebuilder:validation:XValidation:rule="!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)",message="peers is required when replayer mode is set" +// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" type SeiNodeSpec struct { // ChainID of the chain this node belongs to. // +kubebuilder:validation:MinLength=1 @@ -273,6 +274,11 @@ const ( // pre-flight validation. Only set on SeiNodes with // spec.validator.operatorKeyring. ConditionOperatorKeyringReady = "OperatorKeyringReady" + + // ConditionSidecarTLSSecretReady reports whether the Secret named + // in spec.sidecar.tls.secretName is present, well-formed, and has + // SANs matching status.sidecarTLS.requiredDNSNames. + ConditionSidecarTLSSecretReady = "SidecarTLSSecretReady" ) // Reasons for the ImportPVCReady condition. @@ -303,6 +309,15 @@ const ( ReasonOperatorKeyringInvalid = "OperatorKeyringInvalid" // terminal: fail the plan ) +// Reasons for the SidecarTLSSecretReady condition. +const ( + ReasonTLSSecretReady = "TLSSecretReady" // ready + ReasonTLSSecretNotFound = "TLSSecretNotFound" // Secret missing + ReasonTLSSecretUnavailable = "TLSSecretUnavailable" // transient: API error reading the Secret + ReasonTLSSecretMalformed = "TLSSecretMalformed" // wrong type, empty tls.crt/tls.key, or unparseable cert + ReasonTLSSecretSANsMismatch = "TLSSecretSANsMismatch" // cert.DNSNames missing one or more required SANs +) + // SeiNodeStatus defines the observed state of a SeiNode. type SeiNodeStatus struct { // Phase is the high-level lifecycle state. @@ -343,6 +358,19 @@ type SeiNodeStatus struct { // config so the node advertises a reachable address for gossip discovery. // +optional ExternalAddress string `json:"externalAddress,omitempty"` + + // SidecarTLS is set when spec.sidecar.tls is non-nil. + // +optional + SidecarTLS *SidecarTLSStatus `json:"sidecarTLS,omitempty"` +} + +// SidecarTLSStatus is the controller's view of the referenced TLS Secret. +type SidecarTLSStatus struct { + // SecretName mirrors spec.sidecar.tls.secretName. + SecretName string `json:"secretName"` + + // RequiredDNSNames is the SAN list the cert in SecretName must include. + RequiredDNSNames []string `json:"requiredDNSNames"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index efa7970..c044e9f 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -978,6 +978,11 @@ func (in *SeiNodeStatus) DeepCopyInto(out *SeiNodeStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.SidecarTLS != nil { + in, out := &in.SidecarTLS, &out.SidecarTLS + *out = new(SidecarTLSStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeStatus. @@ -1117,6 +1122,26 @@ func (in *SidecarTLSSpec) DeepCopy() *SidecarTLSSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SidecarTLSStatus) DeepCopyInto(out *SidecarTLSStatus) { + *out = *in + if in.RequiredDNSNames != nil { + in, out := &in.RequiredDNSNames, &out.RequiredDNSNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SidecarTLSStatus. +func (in *SidecarTLSStatus) DeepCopy() *SidecarTLSStatus { + if in == nil { + return nil + } + out := new(SidecarTLSStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SigningKeySource) DeepCopyInto(out *SigningKeySource) { *out = *in diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 329ad8d..671c58f 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -611,25 +611,21 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or - "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -921,6 +917,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the + SeiNode to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) + ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) + && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index dad2a1c..421e1c4 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -466,24 +466,21 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -769,6 +766,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode + to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) + || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) + && self.sidecar.tls == oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -996,6 +998,22 @@ spec: items: type: string type: array + sidecarTLS: + description: SidecarTLS is set when spec.sidecar.tls is non-nil. + properties: + requiredDNSNames: + description: RequiredDNSNames is the SAN list the cert in SecretName + must include. + items: + type: string + type: array + secretName: + description: SecretName mirrors spec.sidecar.tls.secretName. + type: string + required: + - requiredDNSNames + - secretName + type: object type: object type: object served: true diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 108e18d..025cbd1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -67,17 +67,6 @@ rules: - patch - update - watch -- apiGroups: - - cert-manager.io - resources: - - certificates - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - gateway.networking.k8s.io resources: diff --git a/docs/design-seinode-sidecar-tls-toggle-lld.md b/docs/design-seinode-sidecar-tls-toggle-lld.md index 985d0c8..6bc5816 100644 --- a/docs/design-seinode-sidecar-tls-toggle-lld.md +++ b/docs/design-seinode-sidecar-tls-toggle-lld.md @@ -74,11 +74,9 @@ CRD-level immutability: ```go // SeiNodeSpec -// +kubebuilder:validation:XValidation:rule="!has(oldSelf.sidecar.tls) || (has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" +// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" ``` -(Exact CEL pending — the rule needs to handle nil sidecar gracefully. May land at the `SidecarConfig` level instead.) - Status additions: ```go @@ -197,9 +195,9 @@ There is no `WaitForSidecarTLSSecret` task in the plan — its job is absorbed i If an operator attempts to mutate `spec.sidecar.tls`, the CRD CEL rejects the API request — no controller code runs. -If the externally-provisioned Secret rotates (cert-manager renewal): kube-rbac-proxy's existing `--tls-reload-interval=30s` flag picks up the new material from the same Secret mount; no pod restart needed; no controller action. The pre-flight validation re-runs on each reconcile and continues stamping `SidecarTLSSecretReady=True` as long as the new cert still has matching SANs. +If the externally-provisioned Secret rotates in place (cert-manager renewal with the same SANs): kube-rbac-proxy's existing `--tls-reload-interval=30s` flag picks up the new material from the same Secret mount; no pod restart needed; no controller action. Pre-flight re-runs each reconcile and continues stamping `SidecarTLSSecretReady=True`. -If the Secret SAN coverage *changes* such that the contract breaks (e.g., wrong SANs after a misconfigured re-issuance): pre-flight flips the condition to `SidecarTLSSecretReady=False, Reason=SANsMismatch`. The Running node continues to serve traffic with the now-wrong cert (kube-rbac-proxy doesn't care about controller-side validation); operators get a visible signal via the condition and can fix the cert. The pod doesn't cycle; this is observability, not enforcement. +If the Secret SAN coverage breaks (wrong SANs after a misconfigured re-issuance, Secret deleted, etc.): pre-flight flips the condition to `SidecarTLSSecretReady=False`. Plan creation is gated — any plan that fires under a broken Secret would eventually cycle the pod (image-drift NodeUpdate plans always do; even a mark-ready replan ultimately retries the sidecar HTTP call through the proxy), and a cycled pod with a missing or mis-SAN'd Secret won't bind cleanly. The Running node keeps serving on whatever cert kube-rbac-proxy is already bound to (no controller action can force a reload-from-bad-Secret), so the user-visible effect is "running pod stays as-is, no new rollouts until the operator fixes the Secret." ## 5. Operator workflow for "enable TLS on existing fleet" diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 2f704ff..771e8d4 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -62,7 +62,6 @@ type SeiNodeReconciler struct { // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch;create;update;patch // Reconcile drives the SeiNode lifecycle. All status mutations after the // finalizer are accumulated in-memory and flushed in a single status patch. @@ -100,17 +99,21 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct statusBase := client.MergeFromWithOptions(before, client.MergeFromWithOptimisticLock{}) observedPhase := node.Status.Phase prevSidecar := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarReady) + prevTLSSecret := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) if err := r.reconcilePeers(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("reconciling peers: %w", err) } + r.reconcileSidecarTLSReady(ctx, node) + planAlreadyActive := node.Status.Plan != nil && node.Status.Plan.Phase == seiv1alpha1.TaskPlanActive if err := r.Planner.ResolvePlan(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("resolving plan: %w", err) } r.emitSidecarReadinessEvent(node, prevSidecar) + r.emitSidecarTLSSecretEvent(node, prevTLSSecret) var result ctrl.Result var execErr error @@ -170,6 +173,16 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{RequeueAfter: statusPollInterval}, nil } + // Pending nodes blocked on the SidecarTLSSecretReady gate poll for + // the Secret appearing. Without this the controller would wait on the + // informer resync (~10h) since the builder does not Watches() Secrets. + if noderesource.SidecarTLSEnabled(node) { + c := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + if c == nil || c.Status != metav1.ConditionTrue { + return ctrl.Result{RequeueAfter: statusPollInterval}, nil + } + } + return result, nil } @@ -255,3 +268,20 @@ func (r *SeiNodeReconciler) emitSidecarReadinessEvent(node *seiv1alpha1.SeiNode, "sidecar Healthz returned 200; mark-ready gate is open") } } + +func (r *SeiNodeReconciler) emitSidecarTLSSecretEvent(node *seiv1alpha1.SeiNode, prev *metav1.Condition) { + cur := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + if cur == nil { + return + } + switch { + case cur.Status == metav1.ConditionFalse && + (prev == nil || prev.Status != metav1.ConditionFalse): + r.Recorder.Eventf(node, corev1.EventTypeWarning, "SidecarTLSSecretNotReady", + "sidecar TLS Secret %q: %s", node.Spec.Sidecar.TLS.SecretName, cur.Message) + case cur.Status == metav1.ConditionTrue && + prev != nil && prev.Status == metav1.ConditionFalse: + r.Recorder.Event(node, corev1.EventTypeNormal, "SidecarTLSSecretReady", + "sidecar TLS Secret validated; plan gate is open") + } +} diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go new file mode 100644 index 0000000..575cec7 --- /dev/null +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -0,0 +1,134 @@ +package node + +import ( + "context" + "crypto/x509" + "encoding/pem" + "fmt" + "slices" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +// reconcileSidecarTLSReady publishes status.sidecarTLS and sets the +// SidecarTLSSecretReady condition based on the referenced Secret's +// presence + cert validity. Clears both when TLS is disabled. +// Mutations are in-memory; caller's Status().Patch flushes. +// +// Reads via the cached client. Reconcile.statusPollInterval covers the +// "Secret arrives after SeiNode" case — the controller does not Watches() +// Secrets, so an explicit requeue loop drives convergence. +func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node *seiv1alpha1.SeiNode) { + if !noderesource.SidecarTLSEnabled(node) { + apimeta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + node.Status.SidecarTLS = nil + return + } + + required := requiredDNSNames(node) + node.Status.SidecarTLS = &seiv1alpha1.SidecarTLSStatus{ + SecretName: node.Spec.Sidecar.TLS.SecretName, + RequiredDNSNames: required, + } + + reason, msg := validateTLSSecret(ctx, r.Client, node, required) + status := metav1.ConditionFalse + if reason == seiv1alpha1.ReasonTLSSecretReady { + status = metav1.ConditionTrue + } + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, + Status: status, + Reason: reason, + Message: msg, + ObservedGeneration: node.Generation, + }) +} + +// requiredDNSNames returns the headless service + pod-0 DNS names the +// proxy is reached on from inside the cluster. +func requiredDNSNames(node *seiv1alpha1.SeiNode) []string { + return []string{ + fmt.Sprintf("%s.%s.svc.cluster.local", node.Name, node.Namespace), + fmt.Sprintf("%s-0.%s.%s.svc.cluster.local", node.Name, node.Name, node.Namespace), + } +} + +// validateTLSSecret returns the SidecarTLSSecretReady reason + message +// for the Secret named in spec.sidecar.tls.secretName. Required SANs +// must appear in cert.DNSNames; Subject.CommonName, IP SANs, and URI +// SANs do not satisfy the contract. +func validateTLSSecret( + ctx context.Context, + reader client.Reader, + node *seiv1alpha1.SeiNode, + required []string, +) (reason, msg string) { + name := node.Spec.Sidecar.TLS.SecretName + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: name, Namespace: node.Namespace} + if err := reader.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + return seiv1alpha1.ReasonTLSSecretNotFound, + fmt.Sprintf("secret %q not found in namespace %q", name, node.Namespace) + } + return seiv1alpha1.ReasonTLSSecretUnavailable, + fmt.Sprintf("transient error getting Secret %q: %v", name, err) + } + + if secret.Type != corev1.SecretTypeTLS { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has type %q; want %q", name, secret.Type, corev1.SecretTypeTLS) + } + + crtPEM := secret.Data[corev1.TLSCertKey] + keyPEM := secret.Data[corev1.TLSPrivateKeyKey] + if len(crtPEM) == 0 { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has empty %s", name, corev1.TLSCertKey) + } + if len(keyPEM) == 0 { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has empty %s", name, corev1.TLSPrivateKeyKey) + } + + block, _ := pem.Decode(crtPEM) + if block == nil { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q %s is not valid PEM", name, corev1.TLSCertKey) + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q %s does not parse: %v", name, corev1.TLSCertKey, err) + } + + if missing := missingSANs(cert.DNSNames, required); len(missing) > 0 { + return seiv1alpha1.ReasonTLSSecretSANsMismatch, + fmt.Sprintf("secret %q cert SANs missing required DNS names: %v (cert has: %v)", + name, missing, cert.DNSNames) + } + + return seiv1alpha1.ReasonTLSSecretReady, + fmt.Sprintf("secret %q is well-formed and SANs cover required DNS names", name) +} + +// missingSANs returns the elements of required that are absent from have. +func missingSANs(have, required []string) []string { + var missing []string + for _, r := range required { + if !slices.Contains(have, r) { + missing = append(missing, r) + } + } + return missing +} diff --git a/internal/controller/node/preflight_sidecar_tls_test.go b/internal/controller/node/preflight_sidecar_tls_test.go new file mode 100644 index 0000000..0769091 --- /dev/null +++ b/internal/controller/node/preflight_sidecar_tls_test.go @@ -0,0 +1,336 @@ +package node + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "math/big" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +const ( + tlsTestChainID = "atlantic-2" + tlsTestImage = "ghcr.io/sei-protocol/seid:latest" +) + +// makeSelfSignedCertPEM produces a PEM-encoded ECDSA P-256 self-signed +// certificate with the given DNS SANs. Cryptographic verification is not +// the controller's concern — the preflight check only parses the cert and +// inspects DNSNames — so a self-signed leaf is sufficient test material. +func makeSelfSignedCertPEM(t *testing.T, dnsNames []string) []byte { + t.Helper() + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generating ECDSA key: %v", err) + } + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "test-leaf"}, + NotBefore: time.Now().Add(-1 * time.Hour), + NotAfter: time.Now().Add(24 * time.Hour), + DNSNames: dnsNames, + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + if err != nil { + t.Fatalf("creating cert: %v", err) + } + return pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) +} + +func tlsTestNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: "tls-node", Namespace: "ns-1"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: tlsTestChainID, + Image: tlsTestImage, + FullNode: &seiv1alpha1.FullNodeSpec{}, + Sidecar: &seiv1alpha1.SidecarConfig{ + TLS: &seiv1alpha1.SidecarTLSSpec{SecretName: "tls-node-cert"}, + }, + }, + } +} + +func tlsTestNodeNoTLS() *seiv1alpha1.SeiNode { + n := tlsTestNode() + n.Spec.Sidecar = nil + return n +} + +func tlsSecret(name, namespace string, crt, key []byte, secretType corev1.SecretType) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Type: secretType, + Data: map[string][]byte{ + corev1.TLSCertKey: crt, + corev1.TLSPrivateKeyKey: key, + }, + } +} + +func tlsReader(t *testing.T, objs ...client.Object) client.Reader { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + return fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() +} + +// --- requiredDNSNames --- + +func TestRequiredDNSNames_ServiceAndPod0(t *testing.T) { + g := NewWithT(t) + got := requiredDNSNames(tlsTestNode()) + g.Expect(got).To(Equal([]string{ + "tls-node.ns-1.svc.cluster.local", + "tls-node-0.tls-node.ns-1.svc.cluster.local", + })) +} + +func TestRequiredDNSNames_DifferentNamespace(t *testing.T) { + g := NewWithT(t) + n := tlsTestNode() + n.Namespace = "eng-brandon" + n.Name = "validator-0" + got := requiredDNSNames(n) + g.Expect(got).To(Equal([]string{ + "validator-0.eng-brandon.svc.cluster.local", + "validator-0-0.validator-0.eng-brandon.svc.cluster.local", + })) +} + +// --- validateTLSSecret matrix --- + +func TestValidateTLSSecret_Ready(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) + g.Expect(msg).To(ContainSubstring("well-formed")) +} + +func TestValidateTLSSecret_NotFound(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + reason, msg := validateTLSSecret(context.Background(), tlsReader(t), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretNotFound)) + g.Expect(msg).To(ContainSubstring("not found")) +} + +func TestValidateTLSSecret_WrongType(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte("c"), []byte("k"), corev1.SecretTypeOpaque) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("type")) +} + +func TestValidateTLSSecret_EmptyCert(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte(""), []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring(corev1.TLSCertKey)) +} + +func TestValidateTLSSecret_EmptyKey(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + makeSelfSignedCertPEM(t, requiredDNSNames(node)), []byte(""), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring(corev1.TLSPrivateKeyKey)) +} + +func TestValidateTLSSecret_NotPEM(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte("not-a-pem-block"), []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("PEM")) +} + +func TestValidateTLSSecret_PEMButUnparseable(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + garbage := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: []byte("\x00\x01\x02\x03")}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + garbage, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("does not parse")) +} + +func TestValidateTLSSecret_SANsMismatch(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + // Cert has the wrong DNS names — covers neither required SAN. + crt := makeSelfSignedCertPEM(t, []string{"unrelated.example.com"}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) + g.Expect(msg).To(ContainSubstring("missing")) +} + +func TestValidateTLSSecret_PartialSANsMismatch(t *testing.T) { + // Cert covers the service SAN but not the pod-0 SAN — still a mismatch. + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, []string{required[0]}) // only the service entry + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, _ := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) +} + +func TestValidateTLSSecret_SupersetSANsOK(t *testing.T) { + // Cert covers required SANs plus extras — still Ready. + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, append([]string{"extra.example.com"}, required...)) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, _ := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) +} + +// --- reconcileSidecarTLSReady lifecycle --- + +func tlsReconciler(t *testing.T, objs ...client.Object) *SeiNodeReconciler { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + return &SeiNodeReconciler{Client: c, Scheme: s} +} + +func TestReconcileSidecarTLSReady_TLSDisabled_ClearsConditionAndStatus(t *testing.T) { + g := NewWithT(t) + node := tlsTestNodeNoTLS() + // Seed stale state to verify it's cleared. + node.Status.SidecarTLS = &seiv1alpha1.SidecarTLSStatus{SecretName: "stale", RequiredDNSNames: []string{"stale.example"}} + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: metav1.ConditionTrue, Reason: seiv1alpha1.ReasonTLSSecretReady, + }) + + r := tlsReconciler(t) + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).To(BeNil()) + g.Expect(apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady)).To(BeNil()) +} + +func TestReconcileSidecarTLSReady_TLSEnabledSecretReady_SetsConditionTrue(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) + g.Expect(node.Status.SidecarTLS.SecretName).To(Equal(node.Spec.Sidecar.TLS.SecretName)) + g.Expect(node.Status.SidecarTLS.RequiredDNSNames).To(Equal(required)) + + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) +} + +func TestReconcileSidecarTLSReady_TLSEnabledSecretMissing_SetsConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + + r := tlsReconciler(t) // no Secret seeded + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).NotTo(BeNil(), "status must still publish the required-SANs contract") + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretNotFound)) +} + +func TestReconcileSidecarTLSReady_SANsMismatch_SetsConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + crt := makeSelfSignedCertPEM(t, []string{"wrong.example.com"}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) +} + +func TestReconcileSidecarTLSReady_TransitionEnabledToDisabled_Cleans(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) + + // CEL blocks the in-place mutation in prod; this exercises the + // defensive cleanup branch. + node.Spec.Sidecar = nil + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).To(BeNil()) + g.Expect(apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady)).To(BeNil()) +} diff --git a/internal/noderesource/noderesource.go b/internal/noderesource/noderesource.go index 0dd68e1..424ef3c 100644 --- a/internal/noderesource/noderesource.go +++ b/internal/noderesource/noderesource.go @@ -14,7 +14,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -951,11 +950,13 @@ func SidecarTLSEnabled(node *seiv1alpha1.SeiNode) bool { return node.Spec.Sidecar != nil && node.Spec.Sidecar.TLS != nil } -// SidecarTLSSecretName is the cert-manager-managed Secret carrying the -// proxy's TLS material. Exported because plan tasks reference it when -// emitting the Certificate. +// SidecarTLSSecretName returns spec.sidecar.tls.secretName, or empty +// when TLS is disabled. Callers gate on SidecarTLSEnabled. func SidecarTLSSecretName(node *seiv1alpha1.SeiNode) string { - return node.Name + "-sidecar-tls" + if !SidecarTLSEnabled(node) { + return "" + } + return node.Spec.Sidecar.TLS.SecretName } // RBACProxyConfigMapName is the ConfigMap carrying the proxy's @@ -1119,38 +1120,6 @@ func proxyReadinessProbe() *corev1.Probe { } } -// GenerateSidecarCertificate returns an unstructured Certificate so -// the controller avoids depending on cert-manager Go types — the -// three fields we need (apiVersion, kind, spec) are stable. -func GenerateSidecarCertificate(node *seiv1alpha1.SeiNode) *unstructured.Unstructured { - if !SidecarTLSEnabled(node) { - return nil - } - tls := node.Spec.Sidecar.TLS - svcDNS := fmt.Sprintf("%s.%s.svc.cluster.local", node.Name, node.Namespace) - pod0DNS := fmt.Sprintf("%s-0.%s.%s.svc.cluster.local", node.Name, node.Name, node.Namespace) - - obj := &unstructured.Unstructured{} - obj.SetAPIVersion("cert-manager.io/v1") - obj.SetKind("Certificate") - obj.SetName(SidecarTLSSecretName(node)) - obj.SetNamespace(node.Namespace) - obj.SetLabels(ResourceLabels(node)) - _ = unstructured.SetNestedMap(obj.Object, map[string]any{ - "secretName": SidecarTLSSecretName(node), - "duration": "2160h", // 90 days - "renewBefore": "360h", // 15 days - "commonName": svcDNS, - "dnsNames": []any{svcDNS, pod0DNS}, - "issuerRef": map[string]any{ - "name": tls.IssuerName, - "kind": tls.IssuerKind, - "group": "cert-manager.io", - }, - }, "spec") - return obj -} - // GenerateRBACProxyConfigMap produces the ConfigMap carrying the // kube-rbac-proxy authorization config — a single coarse SAR scoped // to (group=sei.io, resource=seinodetasks, namespace=, name=). diff --git a/internal/noderesource/sidecar_tls_test.go b/internal/noderesource/sidecar_tls_test.go index 95953e2..02d97e6 100644 --- a/internal/noderesource/sidecar_tls_test.go +++ b/internal/noderesource/sidecar_tls_test.go @@ -13,8 +13,7 @@ import ( func withSidecarTLS(node *seiv1alpha1.SeiNode) *seiv1alpha1.SeiNode { node.Spec.Sidecar.TLS = &seiv1alpha1.SidecarTLSSpec{ - IssuerName: "validator-ca", - IssuerKind: "ClusterIssuer", + SecretName: node.Name + "-tls", } return node } @@ -159,27 +158,32 @@ func TestServicePorts_NoAPIPortWithoutTLS(t *testing.T) { } } -func TestGenerateSidecarCertificate_NilWithoutTLS(t *testing.T) { +func TestSidecarTLSSecretName_ReadsFromSpec(t *testing.T) { g := NewWithT(t) - g.Expect(GenerateSidecarCertificate(newGenesisNode("a", "default"))).To(BeNil()) + g.Expect(SidecarTLSSecretName(newGenesisNode("a", "default"))).To(Equal(""), + "empty when TLS disabled") + + node := withSidecarTLS(newGenesisNode("a", "default")) + g.Expect(SidecarTLSSecretName(node)).To(Equal("a-tls"), + "returns spec.sidecar.tls.secretName when TLS enabled") } -func TestGenerateSidecarCertificate_HappyPath(t *testing.T) { +func TestPodSpec_TLSVolumeUsesSecretNameFromSpec(t *testing.T) { g := NewWithT(t) - cert := GenerateSidecarCertificate(withSidecarTLS(newGenesisNode("a", "default"))) - g.Expect(cert).NotTo(BeNil()) - g.Expect(cert.GetAPIVersion()).To(Equal("cert-manager.io/v1")) - g.Expect(cert.GetKind()).To(Equal("Certificate")) - g.Expect(cert.GetName()).To(Equal("a-sidecar-tls")) - - spec, ok := unstructuredMap(cert, "spec") - g.Expect(ok).To(BeTrue()) - g.Expect(spec["secretName"]).To(Equal("a-sidecar-tls")) - g.Expect(spec["commonName"]).To(Equal("a.default.svc.cluster.local")) - - issuerRef, _ := spec["issuerRef"].(map[string]any) - g.Expect(issuerRef["name"]).To(Equal("validator-ca")) - g.Expect(issuerRef["kind"]).To(Equal("ClusterIssuer")) + node := withSidecarTLS(newGenesisNode("a", "default")) + node.Spec.Sidecar.TLS.SecretName = "custom-cert-secret" + sts := mustGenerateStatefulSet(t, node, platformtest.Config()) + + var tlsVol *corev1.Volume + for i := range sts.Spec.Template.Spec.Volumes { + if sts.Spec.Template.Spec.Volumes[i].Name == sidecarTLSVolumeName { + tlsVol = &sts.Spec.Template.Spec.Volumes[i] + break + } + } + g.Expect(tlsVol).NotTo(BeNil()) + g.Expect(tlsVol.Secret).NotTo(BeNil()) + g.Expect(tlsVol.Secret.SecretName).To(Equal("custom-cert-secret")) } func TestGenerateRBACProxyConfigMap_NilWithoutTLS(t *testing.T) { @@ -260,7 +264,3 @@ func TestRollback_RemovesProxyResources(t *testing.T) { } } -func unstructuredMap(u interface{ UnstructuredContent() map[string]any }, key string) (map[string]any, bool) { - v, ok := u.UnstructuredContent()[key].(map[string]any) - return v, ok -} diff --git a/internal/planner/bootstrap.go b/internal/planner/bootstrap.go index 0902a76..be1853b 100644 --- a/internal/planner/bootstrap.go +++ b/internal/planner/bootstrap.go @@ -99,10 +99,6 @@ func buildBootstrapPlan( // Phase 4: Create production StatefulSet and Service (after bootstrap teardown frees the PVC) if noderesource.SidecarTLSEnabled(node) { - if err := appendTask(task.TaskTypeApplySidecarCert, - &task.ApplySidecarCertParams{NodeName: node.Name, Namespace: node.Namespace}); err != nil { - return nil, err - } if err := appendTask(task.TaskTypeApplyRBACProxyConfig, &task.ApplyRBACProxyConfigParams{NodeName: node.Name, Namespace: node.Namespace}); err != nil { return nil, err @@ -187,7 +183,7 @@ func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) prog := []string{task.TaskTypeEnsureDataPVC} if noderesource.SidecarTLSEnabled(node) { - prog = append(prog, task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig) + prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, task.TaskTypeApplyStatefulSet, diff --git a/internal/planner/planner.go b/internal/planner/planner.go index d6fe745..08a7b25 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -134,6 +134,12 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod handleTerminalPlan(ctx, node) + // Gate plan creation on the TLS Secret. Any plan (init or image- + // drift rollout) eventually cycles the pod, which mounts the Secret. + if noderesource.SidecarTLSEnabled(node) && !sidecarTLSSecretReady(node) { + return nil + } + mode, err := plannerForMode(node) if err != nil { return err @@ -159,6 +165,12 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod return nil } +// sidecarTLSSecretReady returns true iff the condition is present and True. +func sidecarTLSSecretReady(node *seiv1alpha1.SeiNode) bool { + cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + return cond != nil && cond.Status == metav1.ConditionTrue +} + // handleTerminalPlan handles completed or failed plans: clears conditions // and nils the plan so the planner can build the next one if needed. func handleTerminalPlan(ctx context.Context, node *seiv1alpha1.SeiNode) { @@ -529,9 +541,7 @@ func buildBasePlan( prog = append(prog, task.TaskTypeValidateOperatorKeyring) } if noderesource.SidecarTLSEnabled(node) { - // Emit Cert + ConfigMap before pod schedules. Cert-manager - // is async; kubelet retries Secret mounts. - prog = append(prog, task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig) + prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService) prog = append(prog, sidecarProg...) @@ -574,8 +584,6 @@ func paramsForTaskType( return &task.ApplyServiceParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeApplyRBACProxyConfig: return &task.ApplyRBACProxyConfigParams{NodeName: node.Name, Namespace: node.Namespace} - case task.TaskTypeApplySidecarCert: - return &task.ApplySidecarCertParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeReplacePod: return &task.ReplacePodParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeObserveImage: diff --git a/internal/planner/sidecar_tls_test.go b/internal/planner/sidecar_tls_test.go index 9496424..954b744 100644 --- a/internal/planner/sidecar_tls_test.go +++ b/internal/planner/sidecar_tls_test.go @@ -4,6 +4,8 @@ import ( "fmt" "testing" + . "github.com/onsi/gomega" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -25,7 +27,7 @@ func TestSidecarURLForNode_TLSRoutesThroughProxyHTTPS(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: "sei"}, Spec: seiv1alpha1.SeiNodeSpec{Sidecar: &seiv1alpha1.SidecarConfig{ - TLS: &seiv1alpha1.SidecarTLSSpec{IssuerName: "ca", IssuerKind: "ClusterIssuer"}, + TLS: &seiv1alpha1.SidecarTLSSpec{SecretName: testValidatorName + "-tls"}, }}, } got := SidecarURLForNode(node) @@ -40,7 +42,7 @@ const ( testSeidImage = "seid:v6.4.1" ) -func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { +func TestValidatorPlanner_BuildPlan_NoRBACProxyConfigTaskByDefault(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -53,10 +55,6 @@ func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { if err != nil { t.Fatalf("BuildPlan: %v", err) } - if idx := indexOfTaskType(plan, task.TaskTypeApplySidecarCert); idx >= 0 { - t.Fatalf("plan must not contain %s without spec.sidecar.tls; got %v", - task.TaskTypeApplySidecarCert, taskTypes(plan)) - } if idx := indexOfTaskType(plan, task.TaskTypeApplyRBACProxyConfig); idx >= 0 { t.Fatalf("plan must not contain %s without spec.sidecar.tls; got %v", task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) @@ -64,13 +62,10 @@ func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { } func tlsSpec() *seiv1alpha1.SidecarTLSSpec { - return &seiv1alpha1.SidecarTLSSpec{ - IssuerName: "validator-ca", - IssuerKind: "ClusterIssuer", - } + return &seiv1alpha1.SidecarTLSSpec{SecretName: testValidatorName + "-tls"} } -func TestValidatorPlanner_BuildPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T) { +func TestValidatorPlanner_BuildPlan_RBACProxyConfigSequencedBeforeStatefulSet(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -84,31 +79,31 @@ func TestValidatorPlanner_BuildPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t if err != nil { t.Fatalf("BuildPlan: %v", err) } - assertTLSTasksBeforeStatefulSet(t, plan) + assertRBACProxyConfigBeforeStatefulSet(t, plan) } -// assertTLSTasksBeforeStatefulSet covers the invariant that the TLS -// side-resources (Certificate, ConfigMap) are emitted before the -// StatefulSet apply. The pod-spec includes the rbac-proxy ConfigMap -// + TLS Secret as required volumes; if those don't exist the pod -// stays Pending. Regression-locks the cursor-bot HIGH from PR #223. -func assertTLSTasksBeforeStatefulSet(t *testing.T, plan *seiv1alpha1.TaskPlan) { +// assertRBACProxyConfigBeforeStatefulSet locks the invariant that the +// kube-rbac-proxy authz ConfigMap is emitted before the StatefulSet +// apply. The pod-spec mounts the ConfigMap; if it doesn't exist the pod +// stays Pending. (Under the externalized-Secret design the TLS material +// itself is operator-provisioned and gated via the +// SidecarTLSSecretReady condition; only the authz ConfigMap is in-plan.) +func assertRBACProxyConfigBeforeStatefulSet(t *testing.T, plan *seiv1alpha1.TaskPlan) { t.Helper() - certIdx := indexOfTaskType(plan, task.TaskTypeApplySidecarCert) cfgIdx := indexOfTaskType(plan, task.TaskTypeApplyRBACProxyConfig) stsIdx := indexOfTaskType(plan, task.TaskTypeApplyStatefulSet) - if certIdx < 0 || cfgIdx < 0 { - t.Fatalf("plan must contain both %s and %s; got %v", - task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) + if cfgIdx < 0 { + t.Fatalf("plan must contain %s; got %v", + task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) } - if certIdx >= stsIdx || cfgIdx >= stsIdx { - t.Fatalf("expected sidecar-cert(%d), rbac-proxy-config(%d) < apply-statefulset(%d); got %v", - certIdx, cfgIdx, stsIdx, taskTypes(plan)) + if cfgIdx >= stsIdx { + t.Fatalf("expected rbac-proxy-config(%d) < apply-statefulset(%d); got %v", + cfgIdx, stsIdx, taskTypes(plan)) } } -func TestBuildGenesisPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T) { +func TestBuildGenesisPlan_RBACProxyConfigSequencedBeforeStatefulSet(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -122,5 +117,82 @@ func TestBuildGenesisPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T if err != nil { t.Fatalf("buildGenesisPlan: %v", err) } - assertTLSTasksBeforeStatefulSet(t, plan) + assertRBACProxyConfigBeforeStatefulSet(t, plan) +} + +// --- ResolvePlan TLS gating tests (locks e3fd5fc behavior) --- + +func tlsGatedNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: sourceChainID, + Image: testSeidImage, + Validator: &seiv1alpha1.ValidatorSpec{}, + Sidecar: &seiv1alpha1.SidecarConfig{TLS: tlsSpec()}, + }, + } +} + +func setTLSSecretReady(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason string) { + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: status, Reason: reason, + }) +} + +func TestResolvePlan_TLSGate_BlocksWhenConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + setTLSSecretReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonTLSSecretNotFound) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), "plan must not be built when TLS Secret is not ready") +} + +func TestResolvePlan_TLSGate_BlocksWhenConditionAbsent(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + // Condition never set — gate treats missing == false. + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), "plan must not be built when TLS condition is absent") +} + +func TestResolvePlan_TLSGate_AllowsWhenConditionTrue(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + setTLSSecretReady(node, metav1.ConditionTrue, seiv1alpha1.ReasonTLSSecretReady) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).NotTo(BeNil(), "plan should build when TLS Secret is Ready") +} + +func TestResolvePlan_TLSGate_NotEvaluatedWhenTLSDisabled(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + node.Spec.Sidecar = nil // TLS not enabled — gate is bypassed regardless of condition state + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).NotTo(BeNil(), "plan should build for TLS-disabled SeiNode") +} + +func TestResolvePlan_TLSGate_BlocksImageDrift(t *testing.T) { + // Regression for the pre-e3fd5fc carve-out: image drift used to bypass + // the gate. With the carve-out dropped, image drift on a Running node + // with broken TLS must also be blocked. + g := NewWithT(t) + node := tlsGatedNode() + node.Status.Phase = seiv1alpha1.PhaseRunning + node.Status.CurrentImage = "old" + node.Spec.Image = "new" // image drift + setTLSSecretReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonTLSSecretSANsMismatch) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), + "image-drift NodeUpdate plan must not build while TLS Secret is broken — a cycled pod would mount the broken cert") } diff --git a/internal/task/apply_sidecar_cert.go b/internal/task/apply_sidecar_cert.go deleted file mode 100644 index d2ef3e1..0000000 --- a/internal/task/apply_sidecar_cert.go +++ /dev/null @@ -1,70 +0,0 @@ -package task - -import ( - "context" - "encoding/json" - "fmt" - - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" - "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" -) - -const TaskTypeApplySidecarCert = "apply-sidecar-cert" - -type ApplySidecarCertParams struct { - NodeName string `json:"nodeName"` - Namespace string `json:"namespace"` -} - -type applySidecarCertExecution struct { - taskBase - params ApplySidecarCertParams - cfg ExecutionConfig -} - -func deserializeApplySidecarCert(id string, params json.RawMessage, cfg ExecutionConfig) (TaskExecution, error) { - var p ApplySidecarCertParams - if len(params) > 0 { - if err := json.Unmarshal(params, &p); err != nil { - return nil, fmt.Errorf("deserializing apply-sidecar-cert params: %w", err) - } - } - return &applySidecarCertExecution{ - taskBase: taskBase{id: id, status: ExecutionRunning}, - params: p, - cfg: cfg, - }, nil -} - -func (e *applySidecarCertExecution) Execute(ctx context.Context) error { - node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) - if err != nil { - return err - } - - desired := noderesource.GenerateSidecarCertificate(node) - if desired == nil { - e.complete() - return nil - } - if err := ctrl.SetControllerReference(node, desired, e.cfg.Scheme); err != nil { - return fmt.Errorf("setting owner reference on sidecar Certificate: %w", err) - } - - // ForceOwnership: our reconcile wins on field conflicts. cert-manager - // owns .status (separate field manager); we set only .spec. - //nolint:staticcheck // unstructured Patch + Apply is the documented path for non-typed CRDs - if err := e.cfg.KubeClient.Patch(ctx, desired, client.Apply, fieldOwner, client.ForceOwnership); err != nil { - return fmt.Errorf("applying sidecar Certificate: %w", err) - } - - e.complete() - return nil -} - -func (e *applySidecarCertExecution) Status(_ context.Context) ExecutionStatus { - return e.DefaultStatus() -} diff --git a/internal/task/task.go b/internal/task/task.go index cc03c27..ff3b7ef 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -219,7 +219,6 @@ var registry = map[string]taskDeserializer{ TaskTypeApplyStatefulSet: deserializeApplyStatefulSet, TaskTypeApplyService: deserializeApplyService, TaskTypeApplyRBACProxyConfig: deserializeApplyRBACProxyConfig, - TaskTypeApplySidecarCert: deserializeApplySidecarCert, TaskTypeReplacePod: deserializeReplacePod, TaskTypeObserveImage: deserializeObserveImage, TaskTypeValidateSigningKey: deserializeValidateSigningKey, diff --git a/manifests/role.yaml b/manifests/role.yaml index 108e18d..025cbd1 100644 --- a/manifests/role.yaml +++ b/manifests/role.yaml @@ -67,17 +67,6 @@ rules: - patch - update - watch -- apiGroups: - - cert-manager.io - resources: - - certificates - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - gateway.networking.k8s.io resources: diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 329ad8d..671c58f 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -611,25 +611,21 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or - "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -921,6 +917,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the + SeiNode to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) + ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) + && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index dad2a1c..421e1c4 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -466,24 +466,21 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -769,6 +766,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode + to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) + || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) + && self.sidecar.tls == oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -996,6 +998,22 @@ spec: items: type: string type: array + sidecarTLS: + description: SidecarTLS is set when spec.sidecar.tls is non-nil. + properties: + requiredDNSNames: + description: RequiredDNSNames is the SAN list the cert in SecretName + must include. + items: + type: string + type: array + secretName: + description: SecretName mirrors spec.sidecar.tls.secretName. + type: string + required: + - requiredDNSNames + - secretName + type: object type: object type: object served: true