From 6d40acce1f8121c7ce6eedac1dd4adb3555a0ac3 Mon Sep 17 00:00:00 2001 From: Per Goncalves da Silva Date: Thu, 5 Feb 2026 15:17:59 +0100 Subject: [PATCH] feat: Add validation framework with ServiceAccount validator to ClusterExtension Introduces an extensible validation framework that runs early in the ClusterExtension reconciliation pipeline to catch configuration errors before expensive operations begin. The first validator checks whether the specified ServiceAccount exists, providing clear feedback when it's missing. The validation step: - Executes all validators and collects all errors (no fail-fast) - Reuses the same TokenGetter cache for both validation and bundle operations - Sets appropriate status conditions (Installed=Unknown, Progressing=Retrying) - Provides user-friendly error messages This architectural improvement separates validation concerns from revision state retrieval, making it easier to add new validators in the future. Co-Authored-By: Claude Sonnet 4.5 Signed-off-by: Per Goncalves da Silva --- cmd/operator-controller/main.go | 15 +- .../clusterextension_controller_test.go | 240 ++++++++++++++---- .../clusterextension_reconcile_steps.go | 64 ++++- .../controllers/suite_test.go | 7 +- test/e2e/features/install.feature | 24 ++ 5 files changed, 292 insertions(+), 58 deletions(-) diff --git a/cmd/operator-controller/main.go b/cmd/operator-controller/main.go index dd0bc98f6..286260e24 100644 --- a/cmd/operator-controller/main.go +++ b/cmd/operator-controller/main.go @@ -625,8 +625,12 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl ActionClientGetter: acg, RevisionGenerator: rg, } + tokenGetter := authentication.NewTokenGetter(coreClient, authentication.WithExpirationDuration(1*time.Hour)) ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ controllers.HandleFinalizers(c.finalizers), + controllers.ValidateClusterExtension( + controllers.ServiceAccountValidator(tokenGetter), + ), controllers.MigrateStorage(storageMigrator), controllers.RetrieveRevisionStates(revisionStatesGetter), controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), @@ -656,12 +660,6 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl return fmt.Errorf("unable to add tracking cache to manager: %v", err) } - cerCoreClient, err := corev1client.NewForConfig(c.mgr.GetConfig()) - if err != nil { - return fmt.Errorf("unable to create client for ClusterExtensionRevision controller: %w", err) - } - cerTokenGetter := authentication.NewTokenGetter(cerCoreClient, authentication.WithExpirationDuration(1*time.Hour)) - revisionEngineFactory, err := controllers.NewDefaultRevisionEngineFactory( c.mgr.GetScheme(), trackingCache, @@ -669,7 +667,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl c.mgr.GetRESTMapper(), fieldOwnerPrefix, c.mgr.GetConfig(), - cerTokenGetter, + tokenGetter, ) if err != nil { return fmt.Errorf("unable to create revision engine factory: %w", err) @@ -747,6 +745,9 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster revisionStatesGetter := &controllers.HelmRevisionStatesGetter{ActionClientGetter: acg} ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ controllers.HandleFinalizers(c.finalizers), + controllers.ValidateClusterExtension( + controllers.ServiceAccountValidator(tokenGetter), + ), controllers.RetrieveRevisionStates(revisionStatesGetter), controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), controllers.UnpackBundle(c.imagePuller, c.imageCache), diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 28d766ace..ffe51667a 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -6,6 +6,7 @@ import ( "fmt" "testing" "testing/fstest" + "time" bsemver "github.com/blang/semver/v4" "github.com/google/go-cmp/cmp" @@ -15,11 +16,16 @@ import ( "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/release" "helm.sh/helm/v3/pkg/storage/driver" + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/client-go/kubernetes/fake" + clientgotesting "k8s.io/client-go/testing" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" crfinalizer "sigs.k8s.io/controller-runtime/pkg/finalizer" @@ -767,60 +773,177 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) } -func TestClusterExtensionServiceAccountNotFound(t *testing.T) { - cl, reconciler := newClientAndReconciler(t, func(d *deps) { - d.RevisionStatesGetter = &MockRevisionStatesGetter{ - Err: &authentication.ServiceAccountNotFoundError{ - ServiceAccountName: "missing-sa", - ServiceAccountNamespace: "default", - }} - }) - - ctx := context.Background() - extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} - - t.Log("Given a cluster extension with a missing service account") - clusterExtension := &ocv1.ClusterExtension{ - ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, - Spec: ocv1.ClusterExtensionSpec{ - Source: ocv1.SourceConfig{ - SourceType: "Catalog", - Catalog: &ocv1.CatalogFilter{ - PackageName: "test-package", +func TestValidateClusterExtension(t *testing.T) { + tests := []struct { + name string + validators []controllers.ClusterExtensionValidator + expectError bool + errorMessageIncludes string + }{ + { + name: "all validators pass", + validators: []controllers.ClusterExtensionValidator{ + // Validator that always passes + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil }, }, - Namespace: "default", - ServiceAccount: ocv1.ServiceAccountReference{ - Name: "missing-sa", + expectError: false, + }, + { + name: "validator fails - sets Progressing condition", + validators: []controllers.ClusterExtensionValidator{ + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("generic validation error") + }, + }, + expectError: true, + errorMessageIncludes: "generic validation error", + }, + { + name: "multiple validators - collects all failures", + validators: []controllers.ClusterExtensionValidator{ + // First validator fails + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("first validator failed") + }, + // Second validator also fails + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("second validator failed") + }, + // Third validator fails too + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("third validator failed") + }, + }, + expectError: true, + errorMessageIncludes: "first validator failed\nsecond validator failed\nthird validator failed", + }, + { + name: "multiple validators - all pass", + validators: []controllers.ClusterExtensionValidator{ + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil + }, + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil + }, + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil + }, + }, + expectError: false, + }, + { + name: "multiple validators - some pass, some fail", + validators: []controllers.ClusterExtensionValidator{ + // First validator passes + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil + }, + // Second validator fails + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("validation error 1") + }, + // Third validator passes + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return nil + }, + // Fourth validator fails + func(_ context.Context, _ *ocv1.ClusterExtension) error { + return errors.New("validation error 2") + }, + }, + expectError: true, + errorMessageIncludes: "validation error 1\nvalidation error 2", + }, + { + name: "service account not found", + validators: []controllers.ClusterExtensionValidator{ + serviceAccountValidatorWithFakeClient(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "missing-sa", + Namespace: "test-ns", + }, + }), + }, + expectError: true, + errorMessageIncludes: `service account "test-sa" not found in namespace "test-ns"`, + }, + { + name: "service account found", + validators: []controllers.ClusterExtensionValidator{ + serviceAccountValidatorWithFakeClient(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sa", + Namespace: "test-ns", + }, + }), }, + expectError: false, }, } - require.NoError(t, cl.Create(ctx, clusterExtension)) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() - t.Log("When reconciling the cluster extension") - res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{}, + } + d.Validators = tt.validators + }) - require.Equal(t, ctrl.Result{}, res) - require.Error(t, err) - var saErr *authentication.ServiceAccountNotFoundError - require.ErrorAs(t, err, &saErr) - t.Log("By fetching updated cluster extension after reconcile") - require.NoError(t, cl.Get(ctx, extKey, clusterExtension)) + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} - t.Log("By checking the status conditions") - installedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled) - require.NotNil(t, installedCond) - require.Equal(t, metav1.ConditionUnknown, installedCond.Status) - require.Contains(t, installedCond.Message, fmt.Sprintf("service account %q not found in namespace %q: unable to authenticate with the Kubernetes cluster.", - "missing-sa", "default")) + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "test-package", + }, + }, + Namespace: "test-ns", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "test-sa", + }, + }, + } - progressingCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeProgressing) - require.NotNil(t, progressingCond) - require.Equal(t, metav1.ConditionTrue, progressingCond.Status) - require.Equal(t, ocv1.ReasonRetrying, progressingCond.Reason) - require.Contains(t, progressingCond.Message, "installation cannot proceed due to missing ServiceAccount") - require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) + require.NoError(t, cl.Create(ctx, clusterExtension)) + + t.Log("When reconciling the cluster extension") + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.Equal(t, ctrl.Result{}, res) + if tt.expectError { + require.Error(t, err) + t.Log("By fetching updated cluster extension after reconcile") + require.NoError(t, cl.Get(ctx, extKey, clusterExtension)) + + t.Log("By checking the status conditions") + installedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, installedCond) + require.Equal(t, metav1.ConditionUnknown, installedCond.Status) + require.Contains(t, installedCond.Message, "installation cannot proceed due to the following validation error(s)") + require.Contains(t, installedCond.Message, tt.errorMessageIncludes) + + progressingCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeProgressing) + require.NotNil(t, progressingCond) + require.Equal(t, metav1.ConditionTrue, progressingCond.Status) + require.Equal(t, ocv1.ReasonRetrying, progressingCond.Reason) + require.Contains(t, progressingCond.Message, "installation cannot proceed due to the following validation error(s)") + require.Contains(t, progressingCond.Message, tt.errorMessageIncludes) + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) + } else { + require.NoError(t, err) + require.NoError(t, cl.Get(ctx, extKey, clusterExtension)) + require.Empty(t, clusterExtension.Status.Conditions) + } + }) + } } func TestClusterExtensionApplierFailsWithBundleInstalled(t *testing.T) { @@ -2878,3 +3001,32 @@ func TestCheckCatalogsExist(t *testing.T) { require.False(t, exists) }) } + +func serviceAccountValidatorWithFakeClient(serviceAccount *corev1.ServiceAccount) controllers.ClusterExtensionValidator { + if serviceAccount == nil { + return controllers.ServiceAccountValidator(authentication.NewTokenGetter(fake.NewClientset().CoreV1())) + } + fakeClientset := fake.NewClientset(serviceAccount) + fakeClientset.PrependReactor("create", "serviceaccounts", func(action clientgotesting.Action) (bool, runtime.Object, error) { + if action.GetSubresource() != "token" || action.GetNamespace() != serviceAccount.Namespace { + // Let other reactors handle standard SA creates + return false, nil, nil + } + createAction, ok := action.(clientgotesting.CreateActionImpl) + if !ok { + return false, nil, fmt.Errorf("unexpected action: expected CreateActionImpl but got %#v", action) + } + if createAction.GetNamespace() != serviceAccount.Namespace || createAction.Name != serviceAccount.Name { + return false, nil, nil + } + tr := &authenticationv1.TokenRequest{ + Status: authenticationv1.TokenRequestStatus{ + Token: "fake-jwt-token-string", + ExpirationTimestamp: metav1.NewTime(time.Now().Add(1 * time.Hour)), + }, + } + return true, tr, nil + }) + tokenGetter := authentication.NewTokenGetter(fakeClientset.CoreV1()) + return controllers.ServiceAccountValidator(tokenGetter) +} diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 16d691f8b..34976bb27 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -23,6 +23,7 @@ import ( apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/finalizer" @@ -63,6 +64,63 @@ func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc { } } +// ClusterExtensionValidator is a function that validates a ClusterExtension. +// It returns an error if validation fails. Validators are executed sequentially +// in the order they are registered. +type ClusterExtensionValidator func(context.Context, *ocv1.ClusterExtension) error + +// ValidateClusterExtension returns a ReconcileStepFunc that executes all +// validators sequentially. All validators are executed even if some fail, +// and all errors are collected and returned as a joined error. +// This provides complete validation feedback in a single reconciliation cycle. +func ValidateClusterExtension(validators ...ClusterExtensionValidator) ReconcileStepFunc { + return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { + l := log.FromContext(ctx) + + l.Info("validating cluster extension") + var validationErrors []error + for _, validator := range validators { + if err := validator(ctx, ext); err != nil { + validationErrors = append(validationErrors, err) + } + } + + // If there are no validation errors, continue reconciliation + if len(validationErrors) == 0 { + return nil, nil + } + + // Set status condition with the validation error for other validation failures + err := fmt.Errorf("installation cannot proceed due to the following validation error(s): %w", errors.Join(validationErrors...)) + setInstalledStatusConditionUnknown(ext, err.Error()) + setStatusProgressing(ext, err) + return nil, err + } +} + +// ServiceAccountValidator returns a validator that checks if the specified +// ServiceAccount exists in the cluster by using the TokenGetter to fetch a token. +// The TokenGetter maintains an internal cache, so this validation reuses tokens +// instead of creating new ones on every validation attempt. The same token will +// be reused for actual bundle operations. +func ServiceAccountValidator(tokenGetter *authentication.TokenGetter) ClusterExtensionValidator { + return func(ctx context.Context, ext *ocv1.ClusterExtension) error { + saKey := types.NamespacedName{ + Name: ext.Spec.ServiceAccount.Name, + Namespace: ext.Spec.Namespace, + } + _, err := tokenGetter.Get(ctx, saKey) + if err != nil { + var saErr *authentication.ServiceAccountNotFoundError + if errors.As(err, &saErr) { + return fmt.Errorf("service account %q not found in namespace %q", saKey.Name, saKey.Namespace) + } + return fmt.Errorf("error validating service account: %w", err) + } + return nil + } +} + func RetrieveRevisionStates(r RevisionStatesGetter) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) @@ -70,12 +128,6 @@ func RetrieveRevisionStates(r RevisionStatesGetter) ReconcileStepFunc { revisionStates, err := r.GetRevisionStates(ctx, ext) if err != nil { setInstallStatus(ext, nil) - var saerr *authentication.ServiceAccountNotFoundError - if errors.As(err, &saerr) { - setInstalledStatusConditionUnknown(ext, saerr.Error()) - setStatusProgressing(ext, errors.New("installation cannot proceed due to missing ServiceAccount")) - return nil, err - } setInstalledStatusConditionUnknown(ext, err.Error()) setStatusProgressing(ext, errors.New("retrying to get installed bundle")) return nil, err diff --git a/internal/operator-controller/controllers/suite_test.go b/internal/operator-controller/controllers/suite_test.go index 57305a75f..d1630eedc 100644 --- a/internal/operator-controller/controllers/suite_test.go +++ b/internal/operator-controller/controllers/suite_test.go @@ -87,6 +87,7 @@ type deps struct { ImagePuller image.Puller ImageCache image.Cache Applier controllers.Applier + Validators []controllers.ClusterExtensionValidator } func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Client, *controllers.ClusterExtensionReconciler) { @@ -104,7 +105,11 @@ func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Clie for _, opt := range opts { opt(d) } - reconciler.ReconcileSteps = []controllers.ReconcileStepFunc{controllers.HandleFinalizers(d.Finalizers), controllers.RetrieveRevisionStates(d.RevisionStatesGetter)} + reconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ + controllers.HandleFinalizers(d.Finalizers), + controllers.ValidateClusterExtension(d.Validators...), + controllers.RetrieveRevisionStates(d.RevisionStatesGetter), + } if r := d.Resolver; r != nil { reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.ResolveBundle(r, cl)) } diff --git a/test/e2e/features/install.feature b/test/e2e/features/install.feature index 23f75c546..51bebe3a4 100644 --- a/test/e2e/features/install.feature +++ b/test/e2e/features/install.feature @@ -89,6 +89,30 @@ Feature: Install ClusterExtension found bundles for package "test" in multiple catalogs with the same priority [extra-catalog test-catalog] """ + Scenario: Report validation error when ServiceAccount does not exist + When ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: non-existent-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + Then ClusterExtension reports Progressing as True with Reason Retrying and Message includes: + """ + installation cannot proceed due to the following validation error(s): service account "non-existent-sa" not found in namespace "${TEST_NAMESPACE}" + """ + @SingleOwnNamespaceInstallSupport Scenario: watchNamespace config is required for extension supporting single namespace Given ServiceAccount "olm-admin" in test namespace is cluster admin