From 5ca3f6e36de1274e8183907274e7b89dc2df7f0d Mon Sep 17 00:00:00 2001 From: Per Goncalves da Silva Date: Wed, 11 Feb 2026 14:46:21 +0100 Subject: [PATCH] Call RevisionEngine.Teardown when CER is archived When a ClusterExtensionRevision transitions to the Archived lifecycle state, invoke the revision engine's Teardown method to clean up managed resources that are no longer part of the active revision. This ensures resources removed between bundle versions (e.g. a ConfigMap present in v1.0.0 but absent in v1.2.0) are deleted from the cluster. Changes: - Move RevisionEngine creation before the teardown check so it is available for both teardown and reconcile paths - Pass RevisionEngine and boxcutter Revision into teardown() - Call revisionEngine.Teardown() for archived revisions and handle incomplete teardown (requeue after 5s) and errors (return error for controller retry) - Remove redundant lifecycle state check and fix swallowed teardown error - Add unit tests for incomplete teardown requeue and teardown error propagation (teardown coverage: 68.8% -> 93.8%) - Add dummy-configmap to v1.0.0 test bundle (absent in v1.2.0) and assert it is removed in the "Each update creates a new revision" e2e test Signed-off-by: Per Goncalves da Silva --- .../clusterextensionrevision_controller.go | 39 +++++-- ...lusterextensionrevision_controller_test.go | 110 +++++++++++++++++- test/e2e/features/update.feature | 4 +- .../v1.0.0/manifests/dummy.configmap.yaml | 6 + 4 files changed, 145 insertions(+), 14 deletions(-) create mode 100644 testdata/images/bundles/test-operator/v1.0.0/manifests/dummy.configmap.yaml diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 138bcd8eba..4c43f8f12b 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -140,11 +140,13 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev return ctrl.Result{}, fmt.Errorf("converting to boxcutter revision: %v", err) } + // + // Teardown + // if !rev.DeletionTimestamp.IsZero() || rev.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived { - return c.teardown(ctx, rev) + return c.teardown(ctx, revision, rev) } - revVersion := rev.GetAnnotations()[labels.BundleVersionKey] // // Reconcile // @@ -203,6 +205,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } } + revVersion := rev.GetAnnotations()[labels.BundleVersionKey] if !rres.InTransition() { markAsProgressing(rev, ocv1.ReasonSucceeded, fmt.Sprintf("Revision %s has rolled out.", revVersion)) } else { @@ -275,18 +278,32 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev return ctrl.Result{}, nil } -func (c *ClusterExtensionRevisionReconciler) teardown(ctx context.Context, rev *ocv1.ClusterExtensionRevision) (ctrl.Result, error) { - if err := c.TrackingCache.Free(ctx, rev); err != nil { - markAsAvailableUnknown(rev, ocv1.ClusterExtensionRevisionReasonReconciling, err.Error()) +func (c *ClusterExtensionRevisionReconciler) teardown(ctx context.Context, revision *boxcutter.Revision, cer *ocv1.ClusterExtensionRevision) (ctrl.Result, error) { + if err := c.TrackingCache.Free(ctx, cer); err != nil { + markAsAvailableUnknown(cer, ocv1.ClusterExtensionRevisionReasonReconciling, err.Error()) return ctrl.Result{}, fmt.Errorf("error stopping informers: %v", err) } - - // Ensure conditions are set before removing the finalizer when archiving - if rev.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived && markAsArchived(rev) { - return ctrl.Result{}, nil + if cer.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived { + revisionEngine, err := c.RevisionEngineFactory.CreateRevisionEngine(ctx, cer) + if err != nil { + setRetryingConditions(cer, fmt.Sprintf("error archiving: %v", err)) + return ctrl.Result{}, fmt.Errorf("failed to create revision engine: %v", err) + } + tdres, err := revisionEngine.Teardown(ctx, *revision) + if err != nil { + setRetryingConditions(cer, fmt.Sprintf("error archiving: %v", err)) + return ctrl.Result{}, fmt.Errorf("error tearing down revision: %v", err) + } + if tdres != nil && !tdres.IsComplete() { + setRetryingConditions(cer, "tearing down revision") + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + // Ensure conditions are set before removing the finalizer when archiving + if markAsArchived(cer) { + return ctrl.Result{}, nil + } } - - if err := c.removeFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil { + if err := c.removeFinalizer(ctx, cer, clusterExtensionRevisionTeardownFinalizer); err != nil { return ctrl.Result{}, fmt.Errorf("error removing teardown finalizer: %v", err) } return ctrl.Result{}, nil diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go index f456976f43..6e3aa7e68c 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go @@ -641,9 +641,11 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { existingObjs func() []client.Object revisionResult machinery.RevisionResult revisionEngineTeardownFn func(*testing.T) func(context.Context, machinerytypes.Revision, ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) + revisionEngineFactoryErr error validate func(*testing.T, client.Client) trackingCacheFreeFn func(context.Context, client.Object) error expectedErr string + expectedResult ctrl.Result }{ { name: "teardown finalizer is removed", @@ -775,6 +777,109 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { require.Equal(t, int64(1), cond.ObservedGeneration) }, }, + { + name: "set Progressing:True:Retrying and requeue when archived revision teardown is incomplete", + revisionResult: mockRevisionResult{}, + existingObjs: func() []client.Object { + ext := newTestClusterExtension() + rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) + rev1.Finalizers = []string{ + "olm.operatorframework.io/teardown", + } + rev1.Spec.LifecycleState = ocv1.ClusterExtensionRevisionLifecycleStateArchived + return []client.Object{rev1, ext} + }, + revisionEngineTeardownFn: func(t *testing.T) func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return &mockRevisionTeardownResult{ + isComplete: false, + }, nil + } + }, + expectedResult: ctrl.Result{RequeueAfter: 5 * time.Second}, + validate: func(t *testing.T, c client.Client) { + rev := &ocv1.ClusterExtensionRevision{} + err := c.Get(t.Context(), client.ObjectKey{ + Name: clusterExtensionRevisionName, + }, rev) + require.NoError(t, err) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRetrying, cond.Reason) + require.Equal(t, "tearing down revision", cond.Message) + + // Finalizer should still be present + require.Contains(t, rev.Finalizers, "olm.operatorframework.io/teardown") + }, + }, + { + name: "return error and set retrying conditions when archived revision teardown fails", + revisionResult: mockRevisionResult{}, + existingObjs: func() []client.Object { + ext := newTestClusterExtension() + rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) + rev1.Finalizers = []string{ + "olm.operatorframework.io/teardown", + } + rev1.Spec.LifecycleState = ocv1.ClusterExtensionRevisionLifecycleStateArchived + return []client.Object{rev1, ext} + }, + revisionEngineTeardownFn: func(t *testing.T) func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return nil, fmt.Errorf("teardown failed: connection refused") + } + }, + expectedErr: "error tearing down revision", + validate: func(t *testing.T, c client.Client) { + rev := &ocv1.ClusterExtensionRevision{} + err := c.Get(t.Context(), client.ObjectKey{ + Name: clusterExtensionRevisionName, + }, rev) + require.NoError(t, err) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRetrying, cond.Reason) + require.Contains(t, cond.Message, "teardown failed: connection refused") + + // Finalizer should still be present + require.Contains(t, rev.Finalizers, "olm.operatorframework.io/teardown") + }, + }, + { + name: "return error and set retrying conditions when factory fails to create engine during archived teardown", + revisionResult: mockRevisionResult{}, + existingObjs: func() []client.Object { + ext := newTestClusterExtension() + rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) + rev1.Finalizers = []string{ + "olm.operatorframework.io/teardown", + } + rev1.Spec.LifecycleState = ocv1.ClusterExtensionRevisionLifecycleStateArchived + return []client.Object{rev1, ext} + }, + revisionEngineTeardownFn: func(t *testing.T) func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return nil + }, + revisionEngineFactoryErr: fmt.Errorf("token getter failed"), + expectedErr: "failed to create revision engine", + validate: func(t *testing.T, c client.Client) { + rev := &ocv1.ClusterExtensionRevision{} + err := c.Get(t.Context(), client.ObjectKey{ + Name: clusterExtensionRevisionName, + }, rev) + require.NoError(t, err) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRetrying, cond.Reason) + require.Contains(t, cond.Message, "token getter failed") + + // Finalizer should still be present + require.Contains(t, rev.Finalizers, "olm.operatorframework.io/teardown") + }, + }, { name: "revision is torn down when in archived state and finalizer is removed", revisionResult: mockRevisionResult{}, @@ -833,9 +938,10 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { }, teardown: tc.revisionEngineTeardownFn(t), } + factory := &mockRevisionEngineFactory{engine: mockEngine, createErr: tc.revisionEngineFactoryErr} result, err := (&controllers.ClusterExtensionRevisionReconciler{ Client: testClient, - RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine}, + RevisionEngineFactory: factory, TrackingCache: &mockTrackingCache{ client: testClient, freeFn: tc.trackingCacheFreeFn, @@ -847,7 +953,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { }) // reconcile cluster extension revision - require.Equal(t, ctrl.Result{}, result) + require.Equal(t, tc.expectedResult, result) if tc.expectedErr != "" { require.Contains(t, err.Error(), tc.expectedErr) } else { diff --git a/test/e2e/features/update.feature b/test/e2e/features/update.feature index dee45e32a9..782820c2fd 100644 --- a/test/e2e/features/update.feature +++ b/test/e2e/features/update.feature @@ -181,7 +181,7 @@ Feature: Update ClusterExtension Then bundle "test-operator.1.3.0" is installed in version "1.3.0" @BoxcutterRuntime - Scenario: Each update creates a new revision + Scenario: Each update creates a new revision and resources not present in the new revision are removed from the cluster Given ClusterExtension is applied """ apiVersion: olm.operatorframework.io/v1 @@ -212,6 +212,8 @@ Feature: Update ClusterExtension And ClusterExtensionRevision "${NAME}-2" reports Progressing as True with Reason Succeeded And ClusterExtensionRevision "${NAME}-2" reports Available as True with Reason ProbesSucceeded And ClusterExtensionRevision "${NAME}-1" is archived + # dummy-config map exists only in v1.0.0 - once the v1.0.0 revision is archived, it should be gone from the cluster + And resource "configmap/dummy-configmap" is eventually not found @BoxcutterRuntime Scenario: Report all active revisions on ClusterExtension diff --git a/testdata/images/bundles/test-operator/v1.0.0/manifests/dummy.configmap.yaml b/testdata/images/bundles/test-operator/v1.0.0/manifests/dummy.configmap.yaml new file mode 100644 index 0000000000..8135b6f178 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.0/manifests/dummy.configmap.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dummy-configmap +data: + why: "this config map does not exist in higher versions of the bundle - it is useful to test whether resources removed between versions are removed from the cluster as well"