diff --git a/api/v1/clusterextension_types.go b/api/v1/clusterextension_types.go index 7b0a39ef1..05f3a9364 100644 --- a/api/v1/clusterextension_types.go +++ b/api/v1/clusterextension_types.go @@ -436,16 +436,14 @@ type CRDUpgradeSafetyPreflightConfig struct { } const ( - // TypeDeprecated is a rollup condition that is present when - // any of the deprecated conditions are present. + // Deprecation Condition Types TypeDeprecated = "Deprecated" TypePackageDeprecated = "PackageDeprecated" TypeChannelDeprecated = "ChannelDeprecated" TypeBundleDeprecated = "BundleDeprecated" - // None will not perform CRD upgrade safety checks. - CRDUpgradeSafetyEnforcementNone CRDUpgradeSafetyEnforcement = "None" - // Strict will enforce the CRD upgrade safety check and block the upgrade if the CRD would not pass the check. + // CRD Upgrade Safety Enforcement + CRDUpgradeSafetyEnforcementNone CRDUpgradeSafetyEnforcement = "None" CRDUpgradeSafetyEnforcementStrict CRDUpgradeSafetyEnforcement = "Strict" ) diff --git a/api/v1/clusterextensionrevision_types.go b/api/v1/clusterextensionrevision_types.go index 0f41ab319..ed5ff5bae 100644 --- a/api/v1/clusterextensionrevision_types.go +++ b/api/v1/clusterextensionrevision_types.go @@ -25,17 +25,18 @@ const ( ClusterExtensionRevisionKind = "ClusterExtensionRevision" // Condition Types - ClusterExtensionRevisionTypeAvailable = "Available" - ClusterExtensionRevisionTypeProgressing = "Progressing" - ClusterExtensionRevisionTypeSucceeded = "Succeeded" + ClusterExtensionRevisionTypeReady = "Ready" // Condition Reasons - ClusterExtensionRevisionReasonArchived = "Archived" - ClusterExtensionRevisionReasonBlocked = "Blocked" - ClusterExtensionRevisionReasonProbeFailure = "ProbeFailure" - ClusterExtensionRevisionReasonProbesSucceeded = "ProbesSucceeded" - ClusterExtensionRevisionReasonReconciling = "Reconciling" - ClusterExtensionRevisionReasonRetrying = "Retrying" + ClusterExtensionRevisionReasonReady = "Ready" + ClusterExtensionRevisionReasonReconciling = "Reconciling" + ClusterExtensionRevisionReasonProbeFailure = "ProbeFailure" + ClusterExtensionRevisionReasonValidationFailed = "ValidationFailed" + ClusterExtensionRevisionReasonObjectCollision = "ObjectCollision" + ClusterExtensionRevisionReasonProgressDeadlineExceeded = "ProgressDeadlineExceeded" + ClusterExtensionRevisionReasonArchived = "Archived" + ClusterExtensionRevisionReasonRollingOut = "RollingOut" + ClusterExtensionRevisionReasonTransitioning = "Transitioning" ) // ClusterExtensionRevisionSpec defines the desired state of ClusterExtensionRevision. @@ -87,17 +88,6 @@ type ClusterExtensionRevisionSpec struct { // +listMapKey=name // +optional Phases []ClusterExtensionRevisionPhase `json:"phases,omitempty"` - - // progressDeadlineMinutes is an optional field that defines the maximum period - // of time in minutes after which an installation should be considered failed and - // require manual intervention. This functionality is disabled when no value - // is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). - // - // +kubebuilder:validation:Minimum:=10 - // +kubebuilder:validation:Maximum:=720 - // +optional - // - ProgressDeadlineMinutes int32 `json:"progressDeadlineMinutes,omitempty"` } // ClusterExtensionRevisionLifecycleState specifies the lifecycle state of the ClusterExtensionRevision. @@ -184,39 +174,106 @@ const ( CollisionProtectionNone CollisionProtection = "None" ) +// ClusterExtensionRevisionPhaseState represents the state of a phase during rollout. +type ClusterExtensionRevisionPhaseState string + +const ( + // ClusterExtensionRevisionPhaseStateApplied indicates all objects in the phase have been successfully applied. + ClusterExtensionRevisionPhaseStateApplied ClusterExtensionRevisionPhaseState = "Applied" + // ClusterExtensionRevisionPhaseStateProgressing indicates the phase is actively being reconciled and probes are being evaluated. + ClusterExtensionRevisionPhaseStateProgressing ClusterExtensionRevisionPhaseState = "Progressing" + // ClusterExtensionRevisionPhaseStateFailed indicates the phase has failed due to validation errors or collisions. + ClusterExtensionRevisionPhaseStateFailed ClusterExtensionRevisionPhaseState = "Failed" + // ClusterExtensionRevisionPhaseStatePending indicates the phase is waiting for previous phases to complete. + ClusterExtensionRevisionPhaseStatePending ClusterExtensionRevisionPhaseState = "Pending" + // ClusterExtensionRevisionPhaseStateTransitioning indicates objects in the phase are transitioning to a newer revision. + ClusterExtensionRevisionPhaseStateTransitioning ClusterExtensionRevisionPhaseState = "Transitioning" +) + +// ClusterExtensionRevisionProbeFailure describes a failing probe for an object in a phase. +type ClusterExtensionRevisionProbeFailure struct { + // kind is the Kind of the object failing probes. + // +required + Kind string `json:"kind"` + + // name is the name of the object failing probes. + // +required + Name string `json:"name"` + + // namespace is the namespace of the object failing probes. + // Empty for cluster-scoped objects. + // +optional + Namespace string `json:"namespace,omitempty"` + + // message contains the joined probe failure messages. + // +required + Message string `json:"message"` +} + +// ClusterExtensionRevisionPhaseStatus describes the status of a single phase. +type ClusterExtensionRevisionPhaseStatus struct { + // name is the name of the phase. + // +required + Name string `json:"name"` + + // state represents the current state of the phase. + // +required + // +kubebuilder:validation:Enum=Applied;Progressing;Failed;Pending;Transitioning + State ClusterExtensionRevisionPhaseState `json:"state"` + + // message provides additional context about the phase state. + // This may include error messages for failed phases. + // +optional + Message string `json:"message,omitempty"` + + // lastTransitionTime is the last time the phase state changed. + // This only updates when the state, message, or failingProbes change. + // +required + LastTransitionTime metav1.Time `json:"lastTransitionTime"` + + // failingProbes lists objects in this phase that are failing their readiness probes. + // Only populated when state is Progressing and probes are failing. + // +optional + // +listType=atomic + FailingProbes []ClusterExtensionRevisionProbeFailure `json:"failingProbes,omitempty"` +} + // ClusterExtensionRevisionStatus defines the observed state of a ClusterExtensionRevision. type ClusterExtensionRevisionStatus struct { // conditions is an optional list of status conditions describing the state of the // ClusterExtensionRevision. // - // The Progressing condition represents whether the revision is actively rolling out: - // - When status is True and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and is in transition. - // - When status is True and reason is Retrying, the ClusterExtensionRevision has encountered an error that could be resolved on subsequent reconciliation attempts. - // - When status is True and reason is Succeeded, the ClusterExtensionRevision has reached the desired state. - // - When status is False and reason is Blocked, the ClusterExtensionRevision has encountered an error that requires manual intervention for recovery. + // The Ready condition represents whether the revision has been successfully rolled out and is ready: + // - When status is True and reason is Ready, all ClusterExtensionRevision resources have been applied and all progression probes are successful. + // - When status is False and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and objects are being applied. + // - When status is False and reason is Reconciling, the ClusterExtensionRevision is being reconciled or retrying after an error. + // - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes. + // - When status is False and reason is ValidationFailed, the revision failed preflight validation checks. + // - When status is False and reason is ObjectCollision, the revision encountered object ownership collisions. + // - When status is False and reason is ProgressDeadlineExceeded, the revision has not completed within the specified progress deadline. + // - When status is False and reason is Transitioning, the revision's objects are being transitioned to a newer revision. // - When status is False and reason is Archived, the ClusterExtensionRevision is archived and not being actively reconciled. // - // The Available condition represents whether the revision has been successfully rolled out and is available: - // - When status is True and reason is ProbesSucceeded, the ClusterExtensionRevision has been successfully rolled out and all objects pass their readiness probes. - // - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes during rollout. - // - When status is Unknown and reason is Reconciling, the ClusterExtensionRevision has encountered an error that prevented it from observing the probes. - // - When status is Unknown and reason is Archived, the ClusterExtensionRevision has been archived and its objects have been torn down. - // - When status is Unknown and reason is Migrated, the ClusterExtensionRevision was migrated from an existing release and object status probe results have not yet been observed. - // - // The Succeeded condition represents whether the revision has successfully completed its rollout: - // - When status is True and reason is Succeeded, the ClusterExtensionRevision has successfully completed its rollout. This condition is set once and persists even if the revision later becomes unavailable. - // // +listType=map // +listMapKey=type // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` + + // phaseStatuses provides detailed status information for each phase in the revision. + // Each phase status includes the phase name, current state, any error messages, + // and details about failing probes if applicable. + // + // +listType=map + // +listMapKey=name + // +optional + PhaseStatuses []ClusterExtensionRevisionPhaseStatus `json:"phaseStatuses,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:resource:scope=Cluster // +kubebuilder:subresource:status -// +kubebuilder:printcolumn:name="Available",type=string,JSONPath=`.status.conditions[?(@.type=='Available')].status` -// +kubebuilder:printcolumn:name="Progressing",type=string,JSONPath=`.status.conditions[?(@.type=='Progressing')].status` +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=='Ready')].status` +// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=='Ready')].reason` // +kubebuilder:printcolumn:name=Age,type=date,JSONPath=`.metadata.creationTimestamp` // ClusterExtensionRevision represents an immutable snapshot of Kubernetes objects diff --git a/api/v1/validation_test.go b/api/v1/validation_test.go index bbd755c3c..583dfe7af 100644 --- a/api/v1/validation_test.go +++ b/api/v1/validation_test.go @@ -88,52 +88,6 @@ func TestValidate(t *testing.T) { }, want: want{valid: true}, }, - "ClusterExtensionRevision: invalid progress deadline < 10": { - args: args{ - object: ClusterExtensionRevisionSpec{ - ProgressDeadlineMinutes: 9, - }, - }, - want: want{valid: false}, - }, - "ClusterExtensionRevision: valid progress deadline = 10": { - args: args{ - object: ClusterExtensionRevisionSpec{ - ProgressDeadlineMinutes: 10, - }, - }, - want: want{valid: true}, - }, - "ClusterExtensionRevision: valid progress deadline = 360": { - args: args{ - object: ClusterExtensionRevisionSpec{ - ProgressDeadlineMinutes: 360, - }, - }, - want: want{valid: true}, - }, - "ClusterExtensionRevision: valid progress deadline = 720": { - args: args{ - object: ClusterExtensionRevisionSpec{ - ProgressDeadlineMinutes: 720, - }, - }, - want: want{valid: true}, - }, - "ClusterExtensionRevision: invalid progress deadline > 720": { - args: args{ - object: ClusterExtensionRevisionSpec{ - ProgressDeadlineMinutes: 721, - }, - }, - want: want{valid: false}, - }, - "ClusterExtensionRevision: no progress deadline set": { - args: args{ - object: ClusterExtensionRevisionSpec{}, - }, - want: want{valid: true}, - }, } { t.Run(name, func(t *testing.T) { var obj client.Object diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0c5c67c16..50da78698 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -436,6 +436,42 @@ func (in *ClusterExtensionRevisionPhase) DeepCopy() *ClusterExtensionRevisionPha return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterExtensionRevisionPhaseStatus) DeepCopyInto(out *ClusterExtensionRevisionPhaseStatus) { + *out = *in + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + if in.FailingProbes != nil { + in, out := &in.FailingProbes, &out.FailingProbes + *out = make([]ClusterExtensionRevisionProbeFailure, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterExtensionRevisionPhaseStatus. +func (in *ClusterExtensionRevisionPhaseStatus) DeepCopy() *ClusterExtensionRevisionPhaseStatus { + if in == nil { + return nil + } + out := new(ClusterExtensionRevisionPhaseStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterExtensionRevisionProbeFailure) DeepCopyInto(out *ClusterExtensionRevisionProbeFailure) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterExtensionRevisionProbeFailure. +func (in *ClusterExtensionRevisionProbeFailure) DeepCopy() *ClusterExtensionRevisionProbeFailure { + if in == nil { + return nil + } + out := new(ClusterExtensionRevisionProbeFailure) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ClusterExtensionRevisionSpec) DeepCopyInto(out *ClusterExtensionRevisionSpec) { *out = *in @@ -468,6 +504,13 @@ func (in *ClusterExtensionRevisionStatus) DeepCopyInto(out *ClusterExtensionRevi (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.PhaseStatuses != nil { + in, out := &in.PhaseStatuses, &out.PhaseStatuses + *out = make([]ClusterExtensionRevisionPhaseStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterExtensionRevisionStatus. diff --git a/cmd/operator-controller/main.go b/cmd/operator-controller/main.go index dd0bc98f6..3e99cecd1 100644 --- a/cmd/operator-controller/main.go +++ b/cmd/operator-controller/main.go @@ -632,6 +632,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), controllers.UnpackBundle(c.imagePuller, c.imageCache), controllers.ApplyBundleWithBoxcutter(appl.Apply), + controllers.CheckProgressDeadline(&ceReconciler.ProgressDeadlineCheckInFlight), } baseDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(c.mgr.GetConfig()) @@ -751,6 +752,7 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster controllers.ResolveBundle(c.resolver, c.mgr.GetClient()), controllers.UnpackBundle(c.imagePuller, c.imageCache), controllers.ApplyBundle(appl), + controllers.CheckProgressDeadline(&ceReconciler.ProgressDeadlineCheckInFlight), } return nil diff --git a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml index 31e267b30..73ecf1847 100644 --- a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml +++ b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml @@ -16,11 +16,11 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.type=='Available')].status - name: Available + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string - - jsonPath: .status.conditions[?(@.type=='Progressing')].status - name: Progressing + - jsonPath: .status.conditions[?(@.type=='Ready')].reason + name: Reason type: string - jsonPath: .metadata.creationTimestamp name: Age @@ -166,16 +166,6 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 - progressDeadlineMinutes: - description: |- - progressDeadlineMinutes is an optional field that defines the maximum period - of time in minutes after which an installation should be considered failed and - require manual intervention. This functionality is disabled when no value - is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). - format: int32 - maximum: 720 - minimum: 10 - type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision @@ -202,22 +192,16 @@ spec: conditions is an optional list of status conditions describing the state of the ClusterExtensionRevision. - The Progressing condition represents whether the revision is actively rolling out: - - When status is True and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and is in transition. - - When status is True and reason is Retrying, the ClusterExtensionRevision has encountered an error that could be resolved on subsequent reconciliation attempts. - - When status is True and reason is Succeeded, the ClusterExtensionRevision has reached the desired state. - - When status is False and reason is Blocked, the ClusterExtensionRevision has encountered an error that requires manual intervention for recovery. + The Ready condition represents whether the revision has been successfully rolled out and is ready: + - When status is True and reason is Ready, all ClusterExtensionRevision resources have been applied and all progression probes are successful. + - When status is False and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and objects are being applied. + - When status is False and reason is Reconciling, the ClusterExtensionRevision is being reconciled or retrying after an error. + - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes. + - When status is False and reason is ValidationFailed, the revision failed preflight validation checks. + - When status is False and reason is ObjectCollision, the revision encountered object ownership collisions. + - When status is False and reason is ProgressDeadlineExceeded, the revision has not completed within the specified progress deadline. + - When status is False and reason is Transitioning, the revision's objects are being transitioned to a newer revision. - When status is False and reason is Archived, the ClusterExtensionRevision is archived and not being actively reconciled. - - The Available condition represents whether the revision has been successfully rolled out and is available: - - When status is True and reason is ProbesSucceeded, the ClusterExtensionRevision has been successfully rolled out and all objects pass their readiness probes. - - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes during rollout. - - When status is Unknown and reason is Reconciling, the ClusterExtensionRevision has encountered an error that prevented it from observing the probes. - - When status is Unknown and reason is Archived, the ClusterExtensionRevision has been archived and its objects have been torn down. - - When status is Unknown and reason is Migrated, the ClusterExtensionRevision was migrated from an existing release and object status probe results have not yet been observed. - - The Succeeded condition represents whether the revision has successfully completed its rollout: - - When status is True and reason is Succeeded, the ClusterExtensionRevision has successfully completed its rollout. This condition is set once and persists even if the revision later becomes unavailable. items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -276,6 +260,77 @@ spec: x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map + phaseStatuses: + description: |- + phaseStatuses provides detailed status information for each phase in the revision. + Each phase status includes the phase name, current state, any error messages, + and details about failing probes if applicable. + items: + description: ClusterExtensionRevisionPhaseStatus describes the status + of a single phase. + properties: + failingProbes: + description: |- + failingProbes lists objects in this phase that are failing their readiness probes. + Only populated when state is Progressing and probes are failing. + items: + description: ClusterExtensionRevisionProbeFailure describes + a failing probe for an object in a phase. + properties: + kind: + description: kind is the Kind of the object failing probes. + type: string + message: + description: message contains the joined probe failure + messages. + type: string + name: + description: name is the name of the object failing probes. + type: string + namespace: + description: |- + namespace is the namespace of the object failing probes. + Empty for cluster-scoped objects. + type: string + required: + - kind + - message + - name + type: object + type: array + x-kubernetes-list-type: atomic + lastTransitionTime: + description: |- + lastTransitionTime is the last time the phase state changed. + This only updates when the state, message, or failingProbes change. + format: date-time + type: string + message: + description: |- + message provides additional context about the phase state. + This may include error messages for failed phases. + type: string + name: + description: name is the name of the phase. + type: string + state: + description: state represents the current state of the phase. + enum: + - Applied + - Progressing + - Failed + - Pending + - Transitioning + type: string + required: + - lastTransitionTime + - name + - state + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object type: object served: true diff --git a/internal/operator-controller/applier/boxcutter.go b/internal/operator-controller/applier/boxcutter.go index 5f10716c7..016fde897 100644 --- a/internal/operator-controller/applier/boxcutter.go +++ b/internal/operator-controller/applier/boxcutter.go @@ -222,9 +222,6 @@ func (r *SimpleRevisionGenerator) buildClusterExtensionRevision( Phases: PhaseSort(objects), }, } - if p := ext.Spec.ProgressDeadlineMinutes; p > 0 { - cer.Spec.ProgressDeadlineMinutes = p - } return cer } @@ -337,8 +334,8 @@ func (m *BoxcutterStorageMigrator) ensureMigratedRevisionStatus(ctx context.Cont if revisions[i].Spec.Revision != 1 { continue } - // Skip if already succeeded - status is already set correctly. - if meta.IsStatusConditionTrue(revisions[i].Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { + // Skip if already ready - status is already set correctly. + if meta.IsStatusConditionTrue(revisions[i].Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) { return nil } // Ensure revision 1 status is set correctly, including for previously migrated @@ -377,7 +374,7 @@ func (m *BoxcutterStorageMigrator) findLatestDeployedRelease(ac helmclient.Actio return latestDeployed, nil } -// ensureRevisionStatus ensures the revision has the Succeeded status condition set. +// ensureRevisionStatus ensures the revision has the Ready status condition set. // Returns nil if the status is already set or after successfully setting it. // Only sets status on revisions that were actually migrated from Helm (marked with MigratedFromHelmKey label). func (m *BoxcutterStorageMigrator) ensureRevisionStatus(ctx context.Context, rev *ocv1.ClusterExtensionRevision) error { @@ -387,23 +384,23 @@ func (m *BoxcutterStorageMigrator) ensureRevisionStatus(ctx context.Context, rev } // Only set status if this revision was actually migrated from Helm. - // This prevents us from incorrectly marking normal Boxcutter revision 1 as succeeded + // This prevents us from incorrectly marking normal Boxcutter revision 1 as ready // when it's still in progress. if rev.Labels[labels.MigratedFromHelmKey] != "true" { return nil } - // Check if status is already set to Succeeded=True - if meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { + // Check if status is already set to Ready=True + if meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) { return nil } - // Set the Succeeded status condition + // Set the Ready status condition meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, - Message: "Revision succeeded - migrated from Helm release", + Reason: ocv1.ClusterExtensionRevisionReasonReady, + Message: "Revision ready - migrated from Helm release", ObservedGeneration: rev.GetGeneration(), }) diff --git a/internal/operator-controller/applier/boxcutter_test.go b/internal/operator-controller/applier/boxcutter_test.go index f7c1298ce..ac31adc6e 100644 --- a/internal/operator-controller/applier/boxcutter_test.go +++ b/internal/operator-controller/applier/boxcutter_test.go @@ -27,7 +27,6 @@ import ( "k8s.io/apiserver/pkg/authentication/user" "k8s.io/apiserver/pkg/authorization/authorizer" k8scheme "k8s.io/client-go/kubernetes/scheme" - "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -425,65 +424,6 @@ func Test_SimpleRevisionGenerator_AppliesObjectLabelsAndRevisionAnnotations(t *t require.Equal(t, revAnnotations, rev.Annotations) } -func Test_SimpleRevisionGenerator_PropagatesProgressDeadlineMinutes(t *testing.T) { - r := &FakeManifestProvider{ - GetFn: func(b fs.FS, e *ocv1.ClusterExtension) ([]client.Object, error) { - return []client.Object{}, nil - }, - } - - b := applier.SimpleRevisionGenerator{ - Scheme: k8scheme.Scheme, - ManifestProvider: r, - } - - type args struct { - progressDeadlineMinutes *int32 - } - type want struct { - progressDeadlineMinutes int32 - } - type testCase struct { - args args - want want - } - for name, tc := range map[string]testCase{ - "propagates when set": { - args: args{ - progressDeadlineMinutes: ptr.To(int32(10)), - }, - want: want{ - progressDeadlineMinutes: 10, - }, - }, - "do not propagate when unset": { - want: want{ - progressDeadlineMinutes: 0, - }, - }, - } { - ext := &ocv1.ClusterExtension{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-extension", - }, - Spec: ocv1.ClusterExtensionSpec{ - Namespace: "test-namespace", - ServiceAccount: ocv1.ServiceAccountReference{Name: "test-sa"}, - }, - } - empty := map[string]string{} - t.Run(name, func(t *testing.T) { - if pd := tc.args.progressDeadlineMinutes; pd != nil { - ext.Spec.ProgressDeadlineMinutes = *pd - } - - rev, err := b.GenerateRevision(t.Context(), dummyBundle, ext, empty, empty) - require.NoError(t, err) - require.Equal(t, tc.want.progressDeadlineMinutes, rev.Spec.ProgressDeadlineMinutes) - }) - } -} - func Test_SimpleRevisionGenerator_Failure(t *testing.T) { r := &FakeManifestProvider{ GetFn: func(b fs.FS, e *ocv1.ClusterExtension) ([]client.Object, error) { @@ -1352,7 +1292,7 @@ func TestBoxcutterStorageMigrator(t *testing.T) { client.AssertExpectations(t) - // Verify the migrated revision has Succeeded=True status with Succeeded reason and a migration message + // Verify the migrated revision has Ready=True status with Succeeded reason and a migration message statusWriter := client.Status().(*statusWriterMock) require.True(t, statusWriter.updateCalled, "Status().Update() should be called during migration") require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") @@ -1360,12 +1300,12 @@ func TestBoxcutterStorageMigrator(t *testing.T) { rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) require.True(t, ok, "Updated object should be a ClusterExtensionRevision") - succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - require.NotNil(t, succeededCond, "Succeeded condition should be set") - assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") - assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") - assert.Equal(t, "Revision succeeded - migrated from Helm release", succeededCond.Message, "Message should indicate Helm migration") - assert.Equal(t, int64(1), succeededCond.ObservedGeneration, "ObservedGeneration should match revision generation") + readyCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + require.NotNil(t, readyCond, "Ready condition should be set") + assert.Equal(t, metav1.ConditionTrue, readyCond.Status, "Ready condition should be True") + assert.Equal(t, ocv1.ClusterExtensionRevisionReasonReady, readyCond.Reason, "Reason should be Succeeded") + assert.Equal(t, "Revision ready - migrated from Helm release", readyCond.Message, "Message should indicate Helm migration") + assert.Equal(t, int64(1), readyCond.ObservedGeneration, "ObservedGeneration should match revision generation") }) t.Run("does not create revision when revisions exist", func(t *testing.T) { @@ -1400,9 +1340,9 @@ func TestBoxcutterStorageMigrator(t *testing.T) { Status: ocv1.ClusterExtensionRevisionStatus{ Conditions: []metav1.Condition{ { - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, + Reason: ocv1.ClusterExtensionRevisionReasonReady, }, }, }, @@ -1483,10 +1423,10 @@ func TestBoxcutterStorageMigrator(t *testing.T) { rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) require.True(t, ok, "Updated object should be a ClusterExtensionRevision") - succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - require.NotNil(t, succeededCond, "Succeeded condition should be set") - assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") - assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") + readyCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + require.NotNil(t, readyCond, "Ready condition should be set") + assert.Equal(t, metav1.ConditionTrue, readyCond.Status, "Ready condition should be True") + assert.Equal(t, ocv1.ClusterExtensionRevisionReasonReady, readyCond.Reason, "Reason should be Succeeded") }) t.Run("updates status from False to True for migrated revision", func(t *testing.T) { @@ -1507,8 +1447,8 @@ func TestBoxcutterStorageMigrator(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test123"}, } - // Migrated revision with Succeeded=False (e.g., from a previous failed status update attempt) - // This simulates a revision whose Succeeded condition should be corrected from False to True during migration. + // Migrated revision with Ready=False (e.g., from a previous failed status update attempt) + // This simulates a revision whose Ready condition should be corrected from False to True during migration. existingRev := ocv1.ClusterExtensionRevision{ ObjectMeta: metav1.ObjectMeta{ Name: "test-revision", @@ -1523,7 +1463,7 @@ func TestBoxcutterStorageMigrator(t *testing.T) { Status: ocv1.ClusterExtensionRevisionStatus{ Conditions: []metav1.Condition{ { - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionFalse, // Important: False, not missing Reason: "InProgress", }, @@ -1560,10 +1500,10 @@ func TestBoxcutterStorageMigrator(t *testing.T) { rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) require.True(t, ok, "Updated object should be a ClusterExtensionRevision") - succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - require.NotNil(t, succeededCond, "Succeeded condition should be set") - assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be updated to True") - assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") + readyCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + require.NotNil(t, readyCond, "Ready condition should be set") + assert.Equal(t, metav1.ConditionTrue, readyCond.Status, "Ready condition should be updated to True") + assert.Equal(t, ocv1.ClusterExtensionRevisionReasonReady, readyCond.Reason, "Reason should be Succeeded") }) t.Run("does not set status on non-migrated revision 1", func(t *testing.T) { @@ -1707,7 +1647,7 @@ func TestBoxcutterStorageMigrator(t *testing.T) { assert.Equal(t, 2, brb.helmReleaseUsed.Version, "Should use version 2 (deployed), not version 3 (failed)") assert.Equal(t, release.StatusDeployed, brb.helmReleaseUsed.Info.Status, "Should use deployed release") - // Verify the migrated revision has Succeeded=True status + // Verify the migrated revision has Ready=True status statusWriter := client.Status().(*statusWriterMock) require.True(t, statusWriter.updateCalled, "Status().Update() should be called during migration") require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") @@ -1715,9 +1655,9 @@ func TestBoxcutterStorageMigrator(t *testing.T) { rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) require.True(t, ok, "Updated object should be a ClusterExtensionRevision") - succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - require.NotNil(t, succeededCond, "Succeeded condition should be set") - assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") + readyCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + require.NotNil(t, readyCond, "Ready condition should be set") + assert.Equal(t, metav1.ConditionTrue, readyCond.Status, "Ready condition should be True") }) t.Run("does not create revision when helm release is not deployed and no deployed history", func(t *testing.T) { diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go index dfb5dad14..cea7f924a 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -23,8 +23,10 @@ import ( "fmt" "io/fs" "slices" + "strconv" apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -38,7 +40,7 @@ type BoxcutterRevisionStatesGetter struct { Reader client.Reader } -func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*RevisionStates, error) { +func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (RevisionStates, error) { // TODO: boxcutter applier has a nearly identical bit of code for listing and sorting revisions // only difference here is that it sorts in reverse order to start iterating with the most // recent revisions. We should consolidate to avoid code duplication. @@ -52,7 +54,7 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e return cmp.Compare(a.Spec.Revision, b.Spec.Revision) }) - rs := &RevisionStates{} + var rs RevisionStates for _, rev := range existingRevisionList.Items { if rev.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived { continue @@ -61,22 +63,32 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e // TODO: the setting of these annotations (happens in boxcutter applier when we pass in "revisionAnnotations") // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. + state := RevisionStateRollingOut + if apimeta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) { + state = RevisionStateInstalled + } + + generation := int64(0) + if genStr, ok := rev.Annotations[labels.ClusterExtensionGenerationKey]; ok { + if gen, err := strconv.ParseInt(genStr, 10, 64); err == nil { + generation = gen + } + } + rm := &RevisionMetadata{ - RevisionName: rev.Name, - Package: rev.Annotations[labels.PackageNameKey], - Image: rev.Annotations[labels.BundleReferenceKey], - Conditions: rev.Status.Conditions, + RevisionName: rev.Name, + Package: rev.Annotations[labels.PackageNameKey], + Image: rev.Annotations[labels.BundleReferenceKey], + State: state, + ClusterExtensionGeneration: generation, + Conditions: rev.Status.Conditions, BundleMetadata: ocv1.BundleMetadata{ Name: rev.Annotations[labels.BundleNameKey], Version: rev.Annotations[labels.BundleVersionKey], }, } - if apimeta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { - rs.Installed = rm - } else { - rs.RollingOut = append(rs.RollingOut, rm) - } + rs = append(rs, rm) } return rs, nil @@ -100,10 +112,11 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) revisionAnnotations := map[string]string{ - labels.BundleNameKey: state.resolvedRevisionMetadata.Name, - labels.PackageNameKey: state.resolvedRevisionMetadata.Package, - labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, - labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.BundleNameKey: state.resolvedRevisionMetadata.Name, + labels.PackageNameKey: state.resolvedRevisionMetadata.Package, + labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.ClusterExtensionGenerationKey: fmt.Sprintf("%d", ext.GetGeneration()), } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, @@ -126,38 +139,49 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e } ext.Status.ActiveRevisions = []ocv1.RevisionStatus{} - // Mirror Available/Progressing conditions from the installed revision - if i := state.revisionStates.Installed; i != nil { - for _, cndType := range []string{ocv1.ClusterExtensionRevisionTypeAvailable, ocv1.ClusterExtensionRevisionTypeProgressing} { - if cnd := apimeta.FindStatusCondition(i.Conditions, cndType); cnd != nil { - cnd.ObservedGeneration = ext.GetGeneration() - apimeta.SetStatusCondition(&ext.Status.Conditions, *cnd) - } + // Mirror Ready condition from the installed revision + if i := state.revisionStates.Installed(); i != nil { + if cnd := apimeta.FindStatusCondition(i.Conditions, ocv1.ClusterExtensionRevisionTypeReady); cnd != nil { + cnd.ObservedGeneration = ext.GetGeneration() + apimeta.SetStatusCondition(&ext.Status.Conditions, *cnd) } ext.Status.Install = &ocv1.ClusterExtensionInstallStatus{ Bundle: i.BundleMetadata, } ext.Status.ActiveRevisions = []ocv1.RevisionStatus{{Name: i.RevisionName}} } - for idx, r := range state.revisionStates.RollingOut { + for idx, r := range state.revisionStates.RollingOut() { rs := ocv1.RevisionStatus{Name: r.RevisionName} - for _, cndType := range []string{ocv1.ClusterExtensionRevisionTypeAvailable, ocv1.ClusterExtensionRevisionTypeProgressing} { - if cnd := apimeta.FindStatusCondition(r.Conditions, cndType); cnd != nil { - cnd.ObservedGeneration = ext.GetGeneration() - apimeta.SetStatusCondition(&rs.Conditions, *cnd) - } + if cnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeReady); cnd != nil { + cnd.ObservedGeneration = ext.GetGeneration() + apimeta.SetStatusCondition(&rs.Conditions, *cnd) } - // Mirror Progressing condition from the latest active revision - if idx == len(state.revisionStates.RollingOut)-1 { - if pcnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing); pcnd != nil { - pcnd.ObservedGeneration = ext.GetGeneration() - apimeta.SetStatusCondition(&ext.Status.Conditions, *pcnd) + // Mirror Ready condition from the latest active revision + if idx == len(state.revisionStates.RollingOut())-1 { + if rcnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeReady); rcnd != nil { + rcnd.ObservedGeneration = ext.GetGeneration() + apimeta.SetStatusCondition(&ext.Status.Conditions, *rcnd) } } ext.Status.ActiveRevisions = append(ext.Status.ActiveRevisions, rs) } setInstalledStatusFromRevisionStates(ext, state.revisionStates) + + // Set Progressing condition based on revision states + // If there's an installed revision and no rolling revisions, we've reached the desired state + if state.revisionStates.Installed() != nil && len(state.revisionStates.RollingOut()) == 0 { + setStatusProgressing(ext, nil) + } else { + // There are rolling revisions, so we're still progressing + SetStatusCondition(&ext.Status.Conditions, metav1.Condition{ + Type: ocv1.TypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRollingOut, + Message: "Rolling out new revision", + ObservedGeneration: ext.GetGeneration(), + }) + } return nil, nil } } diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go index 78adb7009..ece84e1fb 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps_apply_test.go @@ -31,7 +31,7 @@ import ( func TestApplyBundleWithBoxcutter(t *testing.T) { type args struct { activeRevisions []ocv1.RevisionStatus - revisionStates *RevisionStates + revisionStates RevisionStates } type want struct { activeRevisions []ocv1.RevisionStatus @@ -48,16 +48,18 @@ func TestApplyBundleWithBoxcutter(t *testing.T) { activeRevisions: []ocv1.RevisionStatus{ {Name: "ce-1"}, }, - revisionStates: &RevisionStates{ - Installed: &RevisionMetadata{ + revisionStates: RevisionStates{ + { RevisionName: "ce-1", + State: RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: "test-bundle", Version: "1.0.0", }, }, - RollingOut: []*RevisionMetadata{ - {RevisionName: "ce-2"}, + { + RevisionName: "ce-2", + State: RevisionStateRollingOut, }, }, }, @@ -74,9 +76,10 @@ func TestApplyBundleWithBoxcutter(t *testing.T) { activeRevisions: []ocv1.RevisionStatus{ {Name: "ce-1"}, }, - revisionStates: &RevisionStates{ - Installed: &RevisionMetadata{ + revisionStates: RevisionStates{ + { RevisionName: "ce-2", + State: RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: "test-bundle", Version: "1.0.1", @@ -96,9 +99,10 @@ func TestApplyBundleWithBoxcutter(t *testing.T) { activeRevisions: []ocv1.RevisionStatus{ {Name: "ce-1"}, }, - revisionStates: &RevisionStates{ - RollingOut: []*RevisionMetadata{ - {RevisionName: "ce-1"}, + revisionStates: RevisionStates{ + { + RevisionName: "ce-1", + State: RevisionStateRollingOut, }, }, }, diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index a3fcf48f3..ee9c9ff70 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -22,7 +22,9 @@ import ( "fmt" "io/fs" "slices" + "strconv" "strings" + "sync" "github.com/go-logr/logr" "helm.sh/helm/v3/pkg/release" @@ -60,7 +62,7 @@ const ( ) type reconcileState struct { - revisionStates *RevisionStates + revisionStates RevisionStates resolvedRevisionMetadata *RevisionMetadata imageFS fs.FS } @@ -100,6 +102,9 @@ func (steps *ReconcileSteps) Reconcile(ctx context.Context, ext *ocv1.ClusterExt type ClusterExtensionReconciler struct { client.Client ReconcileSteps ReconcileSteps + // ProgressDeadlineCheckInFlight tracks if we have queued up the reconciliation that detects eventual progress deadline issues + // key is ClusterExtension UID, value is boolean + ProgressDeadlineCheckInFlight sync.Map } type StorageMigrator interface { @@ -114,7 +119,7 @@ type Applier interface { } type RevisionStatesGetter interface { - GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*RevisionStates, error) + GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (RevisionStates, error) } // The operator controller needs to watch all the bundle objects and reconcile accordingly. Though not ideal, but these permissions are required. @@ -496,24 +501,61 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh } } +type RevisionState string + +const ( + RevisionStateInstalled RevisionState = "Installed" + RevisionStateRollingOut RevisionState = "RollingOut" +) + type RevisionMetadata struct { - RevisionName string - Package string - Image string + RevisionName string + Package string + Image string + State RevisionState + ClusterExtensionGeneration int64 ocv1.BundleMetadata Conditions []metav1.Condition } -type RevisionStates struct { - Installed *RevisionMetadata - RollingOut []*RevisionMetadata +// RevisionStates is a sorted list of active revision metadata, sorted ascending by revision number. +// The last element is the most recent revision. +type RevisionStates []*RevisionMetadata + +// Installed returns the installed revision metadata, or nil if none exists. +func (rs RevisionStates) Installed() *RevisionMetadata { + for _, r := range rs { + if r.State == RevisionStateInstalled { + return r + } + } + return nil +} + +// RollingOut returns all revisions that are currently rolling out. +func (rs RevisionStates) RollingOut() []*RevisionMetadata { + var rolling []*RevisionMetadata + for _, r := range rs { + if r.State == RevisionStateRollingOut { + rolling = append(rolling, r) + } + } + return rolling +} + +// Latest returns the most recent revision, or nil if there are no revisions. +func (rs RevisionStates) Latest() *RevisionMetadata { + if len(rs) == 0 { + return nil + } + return rs[len(rs)-1] } type HelmRevisionStatesGetter struct { helmclient.ActionClientGetter } -func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*RevisionStates, error) { +func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (RevisionStates, error) { cl, err := d.ActionClientFor(ctx, ext) if err != nil { return nil, err @@ -523,7 +565,8 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o if err != nil && !errors.Is(err, driver.ErrReleaseNotFound) { return nil, err } - rs := &RevisionStates{} + + var rs RevisionStates if len(relhis) == 0 { return rs, nil } @@ -532,14 +575,23 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o // But we need to look for the most-recent _Deployed_ release for _, rel := range relhis { if rel.Info != nil && rel.Info.Status == release.StatusDeployed { - rs.Installed = &RevisionMetadata{ - Package: rel.Labels[labels.PackageNameKey], - Image: rel.Labels[labels.BundleReferenceKey], + generation := int64(0) + if genStr, ok := rel.Labels[labels.ClusterExtensionGenerationKey]; ok { + if gen, err := strconv.ParseInt(genStr, 10, 64); err == nil { + generation = gen + } + } + + rs = RevisionStates{&RevisionMetadata{ + Package: rel.Labels[labels.PackageNameKey], + Image: rel.Labels[labels.BundleReferenceKey], + State: RevisionStateInstalled, + ClusterExtensionGeneration: generation, BundleMetadata: ocv1.BundleMetadata{ Name: rel.Labels[labels.BundleNameKey], Version: rel.Labels[labels.BundleVersionKey], }, - } + }} break } } diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 28d766ace..c5a0296b5 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -280,9 +280,10 @@ func TestClusterExtensionUpgradeShowsInstalledBundleDeprecation(t *testing.T) { }, nil }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { Package: pkgName, + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: installedBundleName, // v1.0.0 installed Version: "1.0.0", @@ -365,9 +366,10 @@ func TestClusterExtensionResolutionFailsWithoutCatalogDeprecationData(t *testing }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { Package: pkgName, + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: installedBundleName, Version: "1.0.0", @@ -681,8 +683,11 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te // Boxcutter keeps a rolling revision when apply fails. We mirror that state so the test uses // the same inputs the runtime would see. d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - RollingOut: []*controllers.RevisionMetadata{{}}, + RevisionStates: controllers.RevisionStates{ + { + State: controllers.RevisionStateRollingOut, + ClusterExtensionGeneration: 1, // Match the ClusterExtension generation + }, }, } d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { @@ -843,8 +848,9 @@ func TestClusterExtensionApplierFailsWithBundleInstalled(t *testing.T) { }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, Image: "quay.io/operatorhubio/prometheus@fake1.0.0", }, @@ -1181,8 +1187,9 @@ func TestClusterExtensionDeleteFinalizerFails(t *testing.T) { installCompleted: true, } d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, Image: "quay.io/operatorhubio/prometheus@fake1.0.0", }, @@ -2380,6 +2387,7 @@ func TestGetInstalledBundleHistory(t *testing.T) { }, nil, &controllers.RevisionMetadata{ + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: "test-ext", Version: "1.0", @@ -2415,6 +2423,7 @@ func TestGetInstalledBundleHistory(t *testing.T) { }, nil, &controllers.RevisionMetadata{ + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{ Name: "test-ext", Version: "1.0", @@ -2433,8 +2442,8 @@ func TestGetInstalledBundleHistory(t *testing.T) { require.Nil(t, md) } else { require.NoError(t, err) - require.Equal(t, tst.expectedInstalled, md.Installed) - require.Nil(t, md.RollingOut) + require.Equal(t, tst.expectedInstalled, md.Installed()) + require.Empty(t, md.RollingOut()) } } } @@ -2467,9 +2476,10 @@ func TestResolutionFallbackToInstalledBundle(t *testing.T) { } d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { Package: "test-pkg", + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, Image: "test-image:1.0.0", }, @@ -2553,8 +2563,9 @@ func TestResolutionFallbackToInstalledBundle(t *testing.T) { return nil, nil, nil, fmt.Errorf("catalog unavailable") }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, }, }, @@ -2624,9 +2635,10 @@ func TestResolutionFallbackToInstalledBundle(t *testing.T) { }, &v, &declcfg.Deprecation{}, nil }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { Package: "test-pkg", + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, Image: "test-image:1.0.0", }, @@ -2711,9 +2723,10 @@ func TestResolutionFallbackToInstalledBundle(t *testing.T) { return nil, nil, nil, fmt.Errorf("transient catalog issue: cache stale") }) d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ + RevisionStates: controllers.RevisionStates{ + { Package: "test-pkg", + State: controllers.RevisionStateInstalled, BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"}, Image: "test-image:1.0.0", }, diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 16d691f8b..bf1b25705 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -20,6 +20,8 @@ import ( "context" "errors" "fmt" + "sync" + "time" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -95,31 +97,40 @@ func RetrieveRevisionStates(r RevisionStatesGetter) ReconcileStepFunc { func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) - - // If already rolling out, use existing revision and set deprecation to Unknown (no catalog check) - if len(state.revisionStates.RollingOut) > 0 { - installedBundleName := "" - if state.revisionStates.Installed != nil { - installedBundleName = state.revisionStates.Installed.Name + l.Info("resolving bundle") + + // If the latest revision is already rolling out for the current generation, skip resolution + // This prevents re-resolving when a revision is already being deployed for the current spec + if latest := state.revisionStates.Latest(); latest != nil { + if latest.State == RevisionStateRollingOut && latest.ClusterExtensionGeneration == ext.GetGeneration() { + l.Info("skipping resolver - latest revision already rolling out for current generation", + "revisionName", latest.RevisionName, + "generation", ext.GetGeneration()) + installedBundleName := "" + if state.revisionStates.Installed() != nil { + l.Info("installed bundle", "name", state.revisionStates.Installed().Name) + installedBundleName = state.revisionStates.Installed().Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = latest + return nil, nil } - SetDeprecationStatus(ext, installedBundleName, nil, false) - state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0] - return nil, nil } // Resolve a new bundle from the catalog l.V(1).Info("resolving bundle") var bm *ocv1.BundleMetadata - if state.revisionStates.Installed != nil { - bm = &state.revisionStates.Installed.BundleMetadata + if state.revisionStates.Installed() != nil { + l.Info("installed bundle", "name", state.revisionStates.Installed().Name) + bm = &state.revisionStates.Installed().BundleMetadata } resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolve(ctx, ext, bm) // Get the installed bundle name for deprecation status. // BundleDeprecated should reflect what's currently running, not what we're trying to install. installedBundleName := "" - if state.revisionStates.Installed != nil { - installedBundleName = state.revisionStates.Installed.Name + if state.revisionStates.Installed() != nil { + installedBundleName = state.revisionStates.Installed().Name } // Set deprecation status based on resolution results: @@ -142,9 +153,11 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return handleResolutionError(ctx, c, state, ext, err) } + l.Info("resolved bundle", "name", resolvedBundle.Name, "version", resolvedBundleVersion.Version.String(), "deprecation", resolvedDeprecation) state.resolvedRevisionMetadata = &RevisionMetadata{ - Package: resolvedBundle.Package, - Image: resolvedBundle.Image, + Package: resolvedBundle.Package, + Image: resolvedBundle.Image, + ClusterExtensionGeneration: ext.GetGeneration(), // TODO: Right now, operator-controller only supports registry+v1 bundles and has no concept // of a "release" field. If/when we add a release field concept or a new bundle format // we need to re-evaluate use of `AsLegacyRegistryV1Version` so that we avoid propagating @@ -172,7 +185,7 @@ func handleResolutionError(ctx context.Context, c client.Client, state *reconcil l := log.FromContext(ctx) // No installed bundle and resolution failed - cannot proceed - if state.revisionStates.Installed == nil { + if state.revisionStates.Installed() == nil { msg := fmt.Sprintf("failed to resolve bundle: %v", err) setStatusProgressing(ext, err) setInstalledStatusFromRevisionStates(ext, state.revisionStates) @@ -185,7 +198,7 @@ func handleResolutionError(ctx context.Context, c client.Client, state *reconcil if ext.Spec.Source.Catalog != nil { specVersion = ext.Spec.Source.Catalog.Version } - installedVersion := state.revisionStates.Installed.Version + installedVersion := state.revisionStates.Installed().Version // If spec requests a different version, we cannot fall back - must fail and retry if specVersion != "" && specVersion != installedVersion { @@ -246,11 +259,11 @@ func handleResolutionError(ctx context.Context, c client.Client, state *reconcil "resolutionError", err.Error(), "packageName", getPackageName(ext), "catalogName", catalogName, - "installedBundle", state.revisionStates.Installed.Name, - "installedVersion", state.revisionStates.Installed.Version) + "installedBundle", state.revisionStates.Installed().Name, + "installedVersion", state.revisionStates.Installed().Version) // Set installed status based on current revision states (needed before Apply runs) setInstalledStatusFromRevisionStates(ext, state.revisionStates) - state.resolvedRevisionMetadata = state.revisionStates.Installed + state.resolvedRevisionMetadata = state.revisionStates.Installed() // Return no error to allow Apply step to run and maintain resources. // Apply will set Progressing=Succeeded when it completes successfully. return nil, nil @@ -326,9 +339,9 @@ func UnpackBundle(i imageutil.Puller, cache imageutil.Cache) ReconcileStepFunc { // Check if resolved bundle matches installed bundle (no version change) bundleUnchanged := state.revisionStates != nil && - state.revisionStates.Installed != nil && - state.resolvedRevisionMetadata.Name == state.revisionStates.Installed.Name && - state.resolvedRevisionMetadata.Version == state.revisionStates.Installed.Version + state.revisionStates.Installed() != nil && + state.resolvedRevisionMetadata.Name == state.revisionStates.Installed().Name && + state.resolvedRevisionMetadata.Version == state.revisionStates.Installed().Version if err != nil { if bundleUnchanged { @@ -363,10 +376,11 @@ func ApplyBundle(a Applier) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) revisionAnnotations := map[string]string{ - labels.BundleNameKey: state.resolvedRevisionMetadata.Name, - labels.PackageNameKey: state.resolvedRevisionMetadata.Package, - labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, - labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.BundleNameKey: state.resolvedRevisionMetadata.Name, + labels.PackageNameKey: state.resolvedRevisionMetadata.Package, + labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.ClusterExtensionGenerationKey: fmt.Sprintf("%d", ext.GetGeneration()), } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, @@ -387,9 +401,13 @@ func ApplyBundle(a Applier) ReconcileStepFunc { // Set installed status if rolloutSucceeded { - state.revisionStates = &RevisionStates{Installed: state.resolvedRevisionMetadata} - } else if err == nil && state.revisionStates.Installed == nil && len(state.revisionStates.RollingOut) == 0 { - state.revisionStates = &RevisionStates{RollingOut: []*RevisionMetadata{state.resolvedRevisionMetadata}} + rm := state.resolvedRevisionMetadata + rm.State = RevisionStateInstalled + state.revisionStates = RevisionStates{rm} + } else if err == nil && state.revisionStates.Installed() == nil && len(state.revisionStates.RollingOut()) == 0 { + rm := state.resolvedRevisionMetadata + rm.State = RevisionStateRollingOut + state.revisionStates = RevisionStates{rm} } setInstalledStatusFromRevisionStates(ext, state.revisionStates) @@ -412,3 +430,86 @@ func ApplyBundle(a Applier) ReconcileStepFunc { return nil, nil } } + +// CheckProgressDeadline checks if the ClusterExtension has exceeded its progress deadline. +// It monitors the Progressing condition and active revision Ready conditions to detect: +// 1. Retrying transient errors (Progressing=True, Reason=Retrying): bundle resolution, image pull, API errors +// 2. Stuck rollouts (Progressing=True, Reason=RollingOut): revisions failing to reach Ready state +// +// When the deadline is exceeded, it sets Progressing=False with Reason=ProgressDeadlineExceeded +// to mark the failure as terminal. It tracks deadline checks per ClusterExtension UID to avoid +// scheduling redundant requeues. +func CheckProgressDeadline(progressDeadlineCheckInFlight *sync.Map) ReconcileStepFunc { + return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { + l := log.FromContext(ctx) + + pd := ext.Spec.ProgressDeadlineMinutes + if pd <= 0 { + // No progress deadline configured, clear tracking and skip check + progressDeadlineCheckInFlight.Delete(ext.GetUID()) + return nil, nil + } + + cnd := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing) + // Check progress deadline for: + // 1. Transient errors (Progressing=True with Reason=Retrying): bundle resolution, image pull, API errors, etc. + // 2. Stuck rollouts (Progressing=True with Reason=RollingOut): revisions that fail to reach Ready state + // Terminal errors (Progressing=False) like configuration validation failures are already terminal. + isStuckRetrying := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason == ocv1.ReasonRetrying + isRollingOut := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason == ocv1.ReasonRollingOut + + var timeSinceLastTransition time.Duration + var isStuck bool + + if isStuckRetrying { + // Retrying transient errors - use Progressing condition's LastTransitionTime + timeSinceLastTransition = time.Since(cnd.LastTransitionTime.Time) + isStuck = true + } else if isRollingOut { + // Rolling out - check if the latest active revision is stuck + // The latest revision is the one currently rolling out + if len(ext.Status.ActiveRevisions) > 0 { + latestRevStatus := ext.Status.ActiveRevisions[len(ext.Status.ActiveRevisions)-1] + readyCond := apimeta.FindStatusCondition(latestRevStatus.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + if readyCond != nil && readyCond.Status == metav1.ConditionFalse { + // Latest revision is not ready - check how long it's been stuck + timeSinceLastTransition = time.Since(readyCond.LastTransitionTime.Time) + isStuck = true + } + } + } + + if !isStuck { + // Not stuck anymore (either progressing successfully, or already terminal), + // clear the deadline check tracking + progressDeadlineCheckInFlight.Delete(ext.GetUID()) + return nil, nil + } + + timeout := time.Duration(pd) * time.Minute + + if timeSinceLastTransition > timeout { + // Progress deadline exceeded - set terminal condition + l.Info("progress deadline exceeded", "deadline", pd, "timeSinceLastTransition", timeSinceLastTransition) + SetStatusCondition(&ext.Status.Conditions, metav1.Condition{ + Type: ocv1.TypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + Message: fmt.Sprintf("ClusterExtension has not progressed for %d minutes", pd), + ObservedGeneration: ext.GetGeneration(), + }) + // Return nil error and empty result since this is now terminal (no retry needed) + return &ctrl.Result{}, nil + } + + // Deadline not yet exceeded - schedule a requeue to check again later + // Only schedule if we haven't already scheduled one for this ClusterExtension + if _, found := progressDeadlineCheckInFlight.Load(ext.GetUID()); !found { + progressDeadlineCheckInFlight.Store(ext.GetUID(), true) + timeUntilDeadline := timeout - timeSinceLastTransition + return &ctrl.Result{RequeueAfter: timeUntilDeadline}, nil + } + + return nil, nil + } +} diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 138bcd8eb..c99237bad 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "strings" - "sync" "time" appsv1 "k8s.io/api/apps/v1" @@ -48,9 +47,6 @@ type ClusterExtensionRevisionReconciler struct { Client client.Client RevisionEngineFactory RevisionEngineFactory TrackingCache trackingCache - // track if we have queued up the reconciliation that detects eventual progress deadline issues - // keys is revision UUID, value is boolean - progressDeadlineCheckInFlight sync.Map } type trackingCache interface { @@ -79,25 +75,6 @@ func (c *ClusterExtensionRevisionReconciler) Reconcile(ctx context.Context, req reconciledRev := existingRev.DeepCopy() res, reconcileErr := c.reconcile(ctx, reconciledRev) - if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 { - cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded - succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - // check if we reached the progress deadline only if the revision is still progressing and has not succeeded yet - if isStillProgressing && !succeeded { - timeout := time.Duration(pd) * time.Minute - if time.Since(existingRev.CreationTimestamp.Time) > timeout { - // progress deadline reached, reset any errors and stop reconciling this revision - markAsNotProgressing(reconciledRev, ocv1.ReasonProgressDeadlineExceeded, fmt.Sprintf("Revision has not rolled out for %d minutes.", pd)) - reconcileErr = nil - res = ctrl.Result{} - } else if _, found := c.progressDeadlineCheckInFlight.Load(existingRev.GetUID()); !found && reconcileErr == nil { - // if we haven't already queued up a reconcile to check for progress deadline, queue one up, but only once - c.progressDeadlineCheckInFlight.Store(existingRev.GetUID(), true) - res = ctrl.Result{RequeueAfter: timeout} - } - } - } // Do checks before any Update()s, as Update() may modify the resource structure! updateStatus := !equality.Semantic.DeepEqual(existingRev.Status, reconciledRev.Status) @@ -169,6 +146,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if rres != nil { // Log detailed reconcile reports only in debug mode (V(1)) to reduce verbosity. l.V(1).Info("reconcile report", "report", rres.String()) + updatePhaseStatuses(rev, rres) } setRetryingConditions(rev, err.Error()) return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err) @@ -178,14 +156,24 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev // TODO: report status, backoff? if verr := rres.GetValidationError(); verr != nil { l.Error(fmt.Errorf("%w", verr), "preflight validation failed, retrying after 10s") - setRetryingConditions(rev, fmt.Sprintf("revision validation error: %s", verr)) + updatePhaseStatuses(rev, rres) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonValidationFailed, fmt.Sprintf("revision validation error: %s", verr)) return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } for i, pres := range rres.GetPhases() { + phaseName := "" + if i < len(rev.Spec.Phases) { + phaseName = rev.Spec.Phases[i].Name + } + if phaseName == "" { + phaseName = fmt.Sprintf("%d", i) + } + if verr := pres.GetValidationError(); verr != nil { - l.Error(fmt.Errorf("%w", verr), "phase preflight validation failed, retrying after 10s", "phase", i) - setRetryingConditions(rev, fmt.Sprintf("phase %d validation error: %s", i, verr)) + l.Error(fmt.Errorf("%w", verr), "phase preflight validation failed, retrying after 10s", "phase", phaseName) + updatePhaseStatuses(rev, rres) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonValidationFailed, fmt.Sprintf("validation error in phase '%s'", phaseName)) return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } @@ -197,16 +185,20 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } if len(collidingObjs) > 0 { - l.Error(fmt.Errorf("object collision detected"), "object collision, retrying after 10s", "phase", i, "collisions", collidingObjs) - setRetryingConditions(rev, fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n"))) + l.Error(fmt.Errorf("object collision detected"), "object collision, retrying after 10s", "phase", phaseName, "collisions", collidingObjs) + updatePhaseStatuses(rev, rres) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonObjectCollision, fmt.Sprintf("object collision in phase '%s'", phaseName)) return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } } - if !rres.InTransition() { - markAsProgressing(rev, ocv1.ReasonSucceeded, fmt.Sprintf("Revision %s has rolled out.", revVersion)) - } else { - markAsProgressing(rev, ocv1.ReasonRollingOut, fmt.Sprintf("Revision %s is rolling out.", revVersion)) + // Update phase statuses + updatePhaseStatuses(rev, rres) + + // Check if the revision has progressed (all objects transitioned to a newer revision) + if isRevisionProgressing(rres) { + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonTransitioning, "Revision objects have transitioned to a newer revision") + return ctrl.Result{}, nil } //nolint:nestif @@ -226,18 +218,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } } - markAsAvailable(rev, ocv1.ClusterExtensionRevisionReasonProbesSucceeded, "Objects are available and pass all probes.") - - // We'll probably only want to remove this once we are done updating the ClusterExtension conditions - // as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now - // that's fine. - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, - Message: "Revision succeeded rolling out.", - ObservedGeneration: rev.Generation, - }) + markAsReady(rev, ocv1.ClusterExtensionRevisionReasonReady, "Revision has rolled out successfully") } else { var probeFailureMsgs []string for _, pres := range rres.GetPhases() { @@ -266,9 +247,9 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } if len(probeFailureMsgs) > 0 { - markAsUnavailable(rev, ocv1.ClusterExtensionRevisionReasonProbeFailure, strings.Join(probeFailureMsgs, "\n")) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonProbeFailure, strings.Join(probeFailureMsgs, "\n")) } else { - markAsUnavailable(rev, ocv1.ReasonRollingOut, fmt.Sprintf("Revision %s is rolling out.", revVersion)) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonRollingOut, fmt.Sprintf("Revision %s is rolling out.", revVersion)) } } @@ -277,7 +258,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev func (c *ClusterExtensionRevisionReconciler) teardown(ctx context.Context, rev *ocv1.ClusterExtensionRevision) (ctrl.Result, error) { if err := c.TrackingCache.Free(ctx, rev); err != nil { - markAsAvailableUnknown(rev, ocv1.ClusterExtensionRevisionReasonReconciling, err.Error()) + markAsNotReady(rev, ocv1.ClusterExtensionRevisionReasonReconciling, err.Error()) return ctrl.Result{}, fmt.Errorf("error stopping informers: %v", err) } @@ -307,7 +288,7 @@ func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) if !rev.DeletionTimestamp.IsZero() { return true } - if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded { + if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ClusterExtensionRevisionReasonProgressDeadlineExceeded { return false } return true @@ -341,6 +322,22 @@ func (c *ClusterExtensionRevisionReconciler) establishWatch( } } + // Add the parent ClusterExtension's ServiceAccount to the watch list + ownerName, ok := rev.Labels[labels.OwnerNameKey] + if ok { + ext := &ocv1.ClusterExtension{} + if err := c.Client.Get(ctx, client.ObjectKey{Name: ownerName}, ext); err == nil { + // Watch the ServiceAccount referenced by the parent ClusterExtension + gvks.Insert(schema.GroupVersionKind{ + Group: corev1.GroupName, + Version: "v1", + Kind: "ServiceAccount", + }) + } + // If we can't get the ClusterExtension, continue without adding the ServiceAccount watch + // The revision will still watch its own objects + } + return c.TrackingCache.Watch(ctx, rev, gvks) } @@ -539,15 +536,12 @@ var ( ) func setRetryingConditions(cer *ocv1.ClusterExtensionRevision, message string) { - markAsProgressing(cer, ocv1.ClusterExtensionRevisionReasonRetrying, message) - if meta.FindStatusCondition(cer.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) != nil { - markAsAvailableUnknown(cer, ocv1.ClusterExtensionRevisionReasonReconciling, message) - } + markAsNotReady(cer, ocv1.ClusterExtensionRevisionReasonReconciling, message) } -func markAsProgressing(cer *ocv1.ClusterExtensionRevision, reason, message string) { - meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, +func markAsReady(cer *ocv1.ClusterExtensionRevision, reason, message string) bool { + return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionTrue, Reason: reason, Message: message, @@ -555,9 +549,9 @@ func markAsProgressing(cer *ocv1.ClusterExtensionRevision, reason, message strin }) } -func markAsNotProgressing(cer *ocv1.ClusterExtensionRevision, reason, message string) bool { +func markAsNotReady(cer *ocv1.ClusterExtensionRevision, reason, message string) bool { return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionFalse, Reason: reason, Message: message, @@ -565,38 +559,153 @@ func markAsNotProgressing(cer *ocv1.ClusterExtensionRevision, reason, message st }) } -func markAsAvailable(cer *ocv1.ClusterExtensionRevision, reason, message string) bool { - return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - ObservedGeneration: cer.Generation, - }) +func markAsArchived(cer *ocv1.ClusterExtensionRevision) bool { + const msg = "revision is archived" + return markAsNotReady(cer, ocv1.ClusterExtensionRevisionReasonArchived, msg) } -func markAsUnavailable(cer *ocv1.ClusterExtensionRevision, reason, message string) { - meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: reason, - Message: message, - ObservedGeneration: cer.Generation, - }) +// updatePhaseStatuses updates the phase statuses in the ClusterExtensionRevision status based on the reconciliation result. +// It always shows all phases from the spec, defaulting to Pending state for phases without available data. +// It only updates lastTransitionTime when the phase state, message, or failing probes actually change. +func updatePhaseStatuses(rev *ocv1.ClusterExtensionRevision, rres machinery.RevisionResult) { + newPhaseStatuses := make([]ocv1.ClusterExtensionRevisionPhaseStatus, 0, len(rev.Spec.Phases)) + now := metav1.Now() + + // Build a map of existing phase statuses for easy lookup + existingPhaseStatuses := make(map[string]ocv1.ClusterExtensionRevisionPhaseStatus) + for _, ps := range rev.Status.PhaseStatuses { + existingPhaseStatuses[ps.Name] = ps + } + + // Build a map of phase results by index for easy lookup + var phaseResults map[int]machinery.PhaseResult + if rres != nil { + phases := rres.GetPhases() + phaseResults = make(map[int]machinery.PhaseResult, len(phases)) + for i, pres := range phases { + phaseResults[i] = pres + } + } + + // Always iterate through all spec phases to ensure all are represented in status + for i, specPhase := range rev.Spec.Phases { + phaseName := specPhase.Name + if phaseName == "" { + phaseName = fmt.Sprintf("%d", i) + } + + phaseStatus := ocv1.ClusterExtensionRevisionPhaseStatus{ + Name: phaseName, + LastTransitionTime: now, + } + + // Check if we have result data for this phase + pres, hasResult := phaseResults[i] + if !hasResult { + // No data available yet - default to Pending + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStatePending + phaseStatus.Message = "Phase is pending" + } else { + updatePhaseStatus(&phaseStatus, pres) + } + + // Check if this phase status has changed - if not, preserve the lastTransitionTime + if existing, found := existingPhaseStatuses[phaseName]; found { + if phaseStatusesEqual(existing, phaseStatus) { + phaseStatus.LastTransitionTime = existing.LastTransitionTime + } + } + + newPhaseStatuses = append(newPhaseStatuses, phaseStatus) + } + + rev.Status.PhaseStatuses = newPhaseStatuses } -func markAsAvailableUnknown(cer *ocv1.ClusterExtensionRevision, reason, message string) bool { - return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionUnknown, - Reason: reason, - Message: message, - ObservedGeneration: cer.Generation, - }) +func updatePhaseStatus(phaseStatus *ocv1.ClusterExtensionRevisionPhaseStatus, pres machinery.PhaseResult) { + // Determine phase state from result + if verr := pres.GetValidationError(); verr != nil { + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStateFailed + phaseStatus.Message = fmt.Sprintf("Validation error: %s", verr.Error()) + } else if c := countObjectsProgressed(pres); c > 0 { + // Phase has progressed to a newer revision - don't check probes + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStateTransitioning + phaseStatus.Message = fmt.Sprintf("%d/%d objects have transitioned to newer revision", c, len(pres.GetObjects())) + } else if pres.IsComplete() { + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStateApplied + phaseStatus.Message = "All objects applied and at the desired state" + } else { + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStateProgressing + phaseStatus.Message = "Phase is progressing" + updatePhaseProbeFailures(phaseStatus, pres) + } } -func markAsArchived(cer *ocv1.ClusterExtensionRevision) bool { - const msg = "revision is archived" - updated := markAsNotProgressing(cer, ocv1.ClusterExtensionRevisionReasonArchived, msg) - return markAsAvailableUnknown(cer, ocv1.ClusterExtensionRevisionReasonArchived, msg) || updated +func updatePhaseProbeFailures(phaseStatus *ocv1.ClusterExtensionRevisionPhaseStatus, pres machinery.PhaseResult) { + // Extract failing probes and check for collisions + var failingProbes []ocv1.ClusterExtensionRevisionProbeFailure + for _, ores := range pres.GetObjects() { + if ores.Action() == machinery.ActionCollision { + phaseStatus.State = ocv1.ClusterExtensionRevisionPhaseStateFailed + phaseStatus.Message = fmt.Sprintf("Object collision: %s", ores.String()) + break + } + + pr := ores.ProbeResults()[boxcutter.ProgressProbeType] + if pr.Status != machinerytypes.ProbeStatusTrue { + obj := ores.Object() + gvk := obj.GetObjectKind().GroupVersionKind() + failingProbes = append(failingProbes, ocv1.ClusterExtensionRevisionProbeFailure{ + Kind: gvk.Kind, + Name: obj.GetName(), + Namespace: obj.GetNamespace(), + Message: strings.Join(pr.Messages, " and "), + }) + } + } + + if len(failingProbes) > 0 { + phaseStatus.FailingProbes = failingProbes + phaseStatus.Message = fmt.Sprintf("%d object(s) failing probes", len(failingProbes)) + } +} + +func countObjectsProgressed(phaseResult machinery.PhaseResult) int { + progressedCount := 0 + for _, obj := range phaseResult.GetObjects() { + if obj.Action() == machinery.ActionProgressed { + progressedCount++ + } + } + return progressedCount +} + +func isRevisionProgressing(revisionResult machinery.RevisionResult) bool { + progressedCount := 0 + for _, phaseResult := range revisionResult.GetPhases() { + if countObjectsProgressed(phaseResult) > 0 { + progressedCount++ + } + } + return progressedCount > 0 +} + +// phaseStatusesEqual compares two phase statuses ignoring lastTransitionTime. +func phaseStatusesEqual(a, b ocv1.ClusterExtensionRevisionPhaseStatus) bool { + if a.State != b.State || a.Message != b.Message { + return false + } + + if len(a.FailingProbes) != len(b.FailingProbes) { + return false + } + + // Compare failing probes - order matters + for i := range a.FailingProbes { + if a.FailingProbes[i] != b.FailingProbes[i] { + return false + } + } + + return true } diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go index f456976f4..19c4a8c29 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go @@ -66,7 +66,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t }, }, { - name: "Available condition is not updated on error if its not already set", + name: "Ready condition is not updated on error if its not already set", reconcilingRevisionName: clusterExtensionRevisionName, revisionResult: mockRevisionResult{}, revisionReconcileErr: errors.New("some error"), @@ -81,12 +81,15 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) - require.Nil(t, cond) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionFalse, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonReconciling, cond.Reason) + require.Equal(t, "some error", cond.Message) }, }, { - name: "Available condition is updated to Unknown on error if its been already set", + name: "Ready condition is updated to Unknown on error if its been already set", reconcilingRevisionName: clusterExtensionRevisionName, revisionResult: mockRevisionResult{}, revisionReconcileErr: errors.New("some error"), @@ -94,9 +97,9 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t ext := newTestClusterExtension() rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonProbesSucceeded, + Reason: ocv1.ClusterExtensionRevisionReasonReady, Message: "Revision 1.0.0 is rolled out.", ObservedGeneration: 1, }) @@ -108,16 +111,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) - require.Equal(t, metav1.ConditionUnknown, cond.Status) + require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonReconciling, cond.Reason) require.Equal(t, "some error", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Available:False:RollingOut status condition during rollout when no probe failures are detected", + name: "set Ready:False:RollingOut status condition during rollout when no probe failures are detected", reconcilingRevisionName: clusterExtensionRevisionName, revisionResult: mockRevisionResult{}, existingObjs: func() []client.Object { @@ -131,16 +134,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionFalse, cond.Status) - require.Equal(t, ocv1.ReasonRollingOut, cond.Reason) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRollingOut, cond.Reason) require.Equal(t, "Revision 1.0.0 is rolling out.", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Available:False:ProbeFailure condition when probe failures are detected and revision is in transition", + name: "set Ready:False:ProbeFailure condition when probe failures are detected and revision is in transition", reconcilingRevisionName: clusterExtensionRevisionName, revisionResult: mockRevisionResult{ inTransition: true, @@ -222,7 +225,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonProbeFailure, cond.Reason) @@ -231,7 +234,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t }, }, { - name: "set Available:False:ProbeFailure condition when probe failures are detected and revision is not in transition", + name: "set Ready:False:ProbeFailure condition when probe failures are detected and revision is not in transition", reconcilingRevisionName: clusterExtensionRevisionName, revisionResult: mockRevisionResult{ inTransition: false, @@ -313,7 +316,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonProbeFailure, cond.Reason) @@ -336,16 +339,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.TypeProgressing) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) - require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ClusterExtensionRevisionReasonRetrying, cond.Reason) + require.Equal(t, metav1.ConditionFalse, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonReconciling, cond.Reason) require.Equal(t, "some error", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Progressing:True:RollingOut condition while revision is transitioning", + name: "set Ready:False:RollingOut condition while revision is transitioning", revisionResult: mockRevisionResult{ inTransition: true, }, @@ -361,27 +364,27 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.TypeProgressing) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) - require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ReasonRollingOut, cond.Reason) + require.Equal(t, metav1.ConditionFalse, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRollingOut, cond.Reason) require.Equal(t, "Revision 1.0.0 is rolling out.", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Progressing:True:Succeeded once transition rollout is finished", + name: "set Ready:True:Succeeded once transition rollout is finished", revisionResult: mockRevisionResult{ - inTransition: false, + isComplete: true, }, reconcilingRevisionName: clusterExtensionRevisionName, existingObjs: func() []client.Object { ext := newTestClusterExtension() rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.TypeProgressing, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, + Reason: ocv1.ClusterExtensionRevisionReasonReady, Message: "Revision 1.0.0 is rolling out.", ObservedGeneration: 1, }) @@ -393,16 +396,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.TypeProgressing) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ReasonSucceeded, cond.Reason) - require.Equal(t, "Revision 1.0.0 has rolled out.", cond.Message) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonReady, cond.Reason) + require.Equal(t, "Revision has rolled out successfully", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Available:True:ProbesSucceeded and Succeeded:True:Succeeded conditions on successful revision rollout", + name: "set Ready:True:Succeeded condition on successful revision rollout", revisionResult: mockRevisionResult{ isComplete: true, }, @@ -418,25 +421,11 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionReconciliation(t Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ClusterExtensionRevisionReasonProbesSucceeded, cond.Reason) - require.Equal(t, "Objects are available and pass all probes.", cond.Message) - require.Equal(t, int64(1), cond.ObservedGeneration) - - cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - require.NotNil(t, cond) - require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ReasonSucceeded, cond.Reason) - require.Equal(t, "Revision 1.0.0 has rolled out.", cond.Message) - require.Equal(t, int64(1), cond.ObservedGeneration) - - cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) - require.NotNil(t, cond) - require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ReasonSucceeded, cond.Reason) - require.Equal(t, "Revision succeeded rolling out.", cond.Message) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonReady, cond.Reason) + require.Equal(t, "Revision has rolled out successfully", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, @@ -698,7 +687,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { }, }, { - name: "set Available:Unknown:Reconciling and surface tracking cache cleanup errors when deleted", + name: "set Ready:Unknown:Reconciling and surface tracking cache cleanup errors when deleted", revisionResult: mockRevisionResult{}, existingObjs: func() []client.Object { ext := newTestClusterExtension() @@ -727,16 +716,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) - require.Equal(t, metav1.ConditionUnknown, cond.Status) + require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonReconciling, cond.Reason) require.Equal(t, "some tracking cache cleanup error", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Available:Archived:Unknown and Progressing:False:Archived conditions when a revision is archived", + name: "set Ready:False:Archived condition when a revision is archived", revisionResult: mockRevisionResult{}, existingObjs: func() []client.Object { ext := newTestClusterExtension() @@ -760,14 +749,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { Name: clusterExtensionRevisionName, }, rev) require.NoError(t, err) - cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) - require.NotNil(t, cond) - require.Equal(t, metav1.ConditionUnknown, cond.Status) - require.Equal(t, ocv1.ClusterExtensionRevisionReasonArchived, cond.Reason) - require.Equal(t, "revision is archived", cond.Message) - require.Equal(t, int64(1), cond.ObservedGeneration) - - cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeReady) require.NotNil(t, cond) require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonArchived, cond.Reason) @@ -786,14 +768,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_Deletion(t *testing.T) { } rev1.Spec.LifecycleState = ocv1.ClusterExtensionRevisionLifecycleStateArchived meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionUnknown, - Reason: ocv1.ClusterExtensionRevisionReasonArchived, - Message: "revision is archived", - ObservedGeneration: rev1.Generation, - }) - meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Type: ocv1.ClusterExtensionRevisionTypeReady, Status: metav1.ConditionFalse, Reason: ocv1.ClusterExtensionRevisionReasonArchived, Message: "revision is archived", @@ -875,90 +850,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_ProgressDeadline(t *testi validate func(*testing.T, client.Client) reconcileErr error reconcileResult ctrl.Result - }{ - { - name: "progressing set to false when progress deadline is exceeded", - existingObjs: func() []client.Object { - ext := newTestClusterExtension() - rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) - rev1.Spec.ProgressDeadlineMinutes = 1 - rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-61 * time.Second)) - return []client.Object{rev1, ext} - }, - revisionResult: &mockRevisionResult{ - inTransition: true, - }, - validate: func(t *testing.T, c client.Client) { - rev := &ocv1.ClusterExtensionRevision{} - err := c.Get(t.Context(), client.ObjectKey{ - Name: clusterExtensionRevisionName, - }, rev) - require.NoError(t, err) - cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - require.Equal(t, metav1.ConditionFalse, cnd.Status) - require.Equal(t, ocv1.ReasonProgressDeadlineExceeded, cnd.Reason) - }, - }, - { - name: "requeue after progressDeadline time for final progression deadline check", - existingObjs: func() []client.Object { - ext := newTestClusterExtension() - rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) - rev1.Spec.ProgressDeadlineMinutes = 1 - rev1.CreationTimestamp = metav1.NewTime(time.Now()) - return []client.Object{rev1, ext} - }, - revisionResult: &mockRevisionResult{ - inTransition: true, - }, - reconcileResult: ctrl.Result{RequeueAfter: 1 * time.Minute}, - validate: func(t *testing.T, c client.Client) { - rev := &ocv1.ClusterExtensionRevision{} - err := c.Get(t.Context(), client.ObjectKey{ - Name: clusterExtensionRevisionName, - }, rev) - require.NoError(t, err) - cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - require.Equal(t, metav1.ConditionTrue, cnd.Status) - require.Equal(t, ocv1.ReasonRollingOut, cnd.Reason) - }, - }, - { - name: "no progression deadline checks on revision recovery", - existingObjs: func() []client.Object { - ext := newTestClusterExtension() - rev1 := newTestClusterExtensionRevision(t, clusterExtensionRevisionName, ext, testScheme) - rev1.Spec.ProgressDeadlineMinutes = 1 - rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-2 * time.Minute)) - meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, - ObservedGeneration: rev1.Generation, - }) - meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonSucceeded, - ObservedGeneration: rev1.Generation, - }) - return []client.Object{rev1, ext} - }, - revisionResult: &mockRevisionResult{ - inTransition: true, - }, - validate: func(t *testing.T, c client.Client) { - rev := &ocv1.ClusterExtensionRevision{} - err := c.Get(t.Context(), client.ObjectKey{ - Name: clusterExtensionRevisionName, - }, rev) - require.NoError(t, err) - cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - require.Equal(t, metav1.ConditionTrue, cnd.Status) - require.Equal(t, ocv1.ReasonRollingOut, cnd.Reason) - }, - }, - } { + }{} { t.Run(tc.name, func(t *testing.T) { // create extension and cluster extension testClient := fake.NewClientBuilder(). diff --git a/internal/operator-controller/controllers/common_controller.go b/internal/operator-controller/controllers/common_controller.go index cb6834c6b..fe3586026 100644 --- a/internal/operator-controller/controllers/common_controller.go +++ b/internal/operator-controller/controllers/common_controller.go @@ -53,20 +53,20 @@ func SetStatusCondition(conditions *[]metav1.Condition, condition metav1.Conditi } // setInstalledStatusFromRevisionStates sets the installed status based on the given installedBundle. -func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionStates *RevisionStates) { +func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionStates RevisionStates) { // Nothing is installed - if revisionStates.Installed == nil { + if revisionStates.Installed() == nil { setInstallStatus(ext, nil) - reason := determineFailureReason(revisionStates.RollingOut) + reason := determineFailureReason(revisionStates.RollingOut()) setInstalledStatusConditionFalse(ext, reason, "No bundle installed") return } // Something is installed installStatus := &ocv1.ClusterExtensionInstallStatus{ - Bundle: revisionStates.Installed.BundleMetadata, + Bundle: revisionStates.Installed().BundleMetadata, } setInstallStatus(ext, installStatus) - setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image)) + setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed().Image)) } // determineFailureReason determines the appropriate reason for the Installed condition @@ -93,12 +93,12 @@ func determineFailureReason(rollingRevisions []*RevisionMetadata) string { return ocv1.ReasonFailed } - // Check if the LATEST rolling revision indicates an error (Retrying reason) + // Check if the LATEST rolling revision indicates an error // Latest revision is the last element in the array (sorted ascending by Spec.Revision) latestRevision := rollingRevisions[len(rollingRevisions)-1] - progressingCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) { - // Retrying indicates an error occurred (config, apply, validation, etc.) + readyCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeReady) + if readyCond != nil && readyCond.Status == "False" && readyCond.Reason == string(ocv1.ClusterExtensionRevisionReasonReconciling) { + // Reconciling indicates an error occurred (config, apply, validation, etc.) // Use Failed for semantic correctness: installation failed due to error return ocv1.ReasonFailed } diff --git a/internal/operator-controller/controllers/common_controller_test.go b/internal/operator-controller/controllers/common_controller_test.go index 3cc757543..578ae3e31 100644 --- a/internal/operator-controller/controllers/common_controller_test.go +++ b/internal/operator-controller/controllers/common_controller_test.go @@ -256,15 +256,12 @@ func TestSetStatusConditionWrapper(t *testing.T) { func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T) { tests := []struct { name string - revisionStates *RevisionStates + revisionStates RevisionStates expectedInstalledCond metav1.Condition }{ { - name: "no revisions at all - uses Failed", - revisionStates: &RevisionStates{ - Installed: nil, - RollingOut: nil, - }, + name: "no revisions at all - uses Failed", + revisionStates: RevisionStates{}, expectedInstalledCond: metav1.Condition{ Type: ocv1.TypeInstalled, Status: metav1.ConditionFalse, @@ -273,18 +270,16 @@ func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T }, { name: "rolling revision with error (Retrying) - uses Failed", - revisionStates: &RevisionStates{ - Installed: nil, - RollingOut: []*RevisionMetadata{ - { - RevisionName: "rev-1", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRetrying, - Message: "some error occurred", - }, + revisionStates: RevisionStates{ + { + RevisionName: "rev-1", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ClusterExtensionRevisionReasonReconciling, + Message: "some error occurred", }, }, }, @@ -297,29 +292,28 @@ func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T }, { name: "multiple rolling revisions with one Retrying - uses Failed", - revisionStates: &RevisionStates{ - Installed: nil, - RollingOut: []*RevisionMetadata{ - { - RevisionName: "rev-1", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonRollingOut, - Message: "Revision is rolling out", - }, + revisionStates: RevisionStates{ + { + RevisionName: "rev-1", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonRollingOut, + Message: "Revision is rolling out", }, }, - { - RevisionName: "rev-2", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRetrying, - Message: "validation error occurred", - }, + }, + { + RevisionName: "rev-2", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ClusterExtensionRevisionReasonReconciling, + Message: "validation error occurred", }, }, }, @@ -332,18 +326,16 @@ func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T }, { name: "rolling revision with RollingOut reason - uses Absent", - revisionStates: &RevisionStates{ - Installed: nil, - RollingOut: []*RevisionMetadata{ - { - RevisionName: "rev-1", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonRollingOut, - Message: "Revision is rolling out", - }, + revisionStates: RevisionStates{ + { + RevisionName: "rev-1", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonRollingOut, + Message: "Revision is rolling out", }, }, }, @@ -356,29 +348,28 @@ func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T }, { name: "old revision with Retrying superseded by latest healthy - uses Absent", - revisionStates: &RevisionStates{ - Installed: nil, - RollingOut: []*RevisionMetadata{ - { - RevisionName: "rev-1", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRetrying, - Message: "old error that was superseded", - }, + revisionStates: RevisionStates{ + { + RevisionName: "rev-1", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ClusterExtensionRevisionReasonReconciling, + Message: "old error that was superseded", }, }, - { - RevisionName: "rev-2", - Conditions: []metav1.Condition{ - { - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonRollingOut, - Message: "Latest revision is rolling out healthy", - }, + }, + { + RevisionName: "rev-2", + State: RevisionStateRollingOut, + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeReady, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonRollingOut, + Message: "Latest revision is rolling out healthy", }, }, }, diff --git a/internal/operator-controller/controllers/suite_test.go b/internal/operator-controller/controllers/suite_test.go index 57305a75f..6c32dc148 100644 --- a/internal/operator-controller/controllers/suite_test.go +++ b/internal/operator-controller/controllers/suite_test.go @@ -55,11 +55,11 @@ func newClient(t *testing.T) client.Client { var _ controllers.RevisionStatesGetter = (*MockRevisionStatesGetter)(nil) type MockRevisionStatesGetter struct { - *controllers.RevisionStates + controllers.RevisionStates Err error } -func (m *MockRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*controllers.RevisionStates, error) { +func (m *MockRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (controllers.RevisionStates, error) { if m.Err != nil { return nil, m.Err } @@ -94,7 +94,7 @@ func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Clie d := &deps{ RevisionStatesGetter: &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{}, + RevisionStates: controllers.RevisionStates{}, }, Finalizers: crfinalizer.NewFinalizers(), } diff --git a/internal/operator-controller/labels/labels.go b/internal/operator-controller/labels/labels.go index 16f45ecbb..254ab0bd3 100644 --- a/internal/operator-controller/labels/labels.go +++ b/internal/operator-controller/labels/labels.go @@ -44,4 +44,10 @@ const ( // that were created during migration from Helm releases. This label is used // to distinguish migrated revisions from those created by normal Boxcutter operation. MigratedFromHelmKey = "olm.operatorframework.io/migrated-from-helm" + + // ClusterExtensionGenerationKey is the annotation key used to record the + // generation of the ClusterExtension at the time the ClusterExtensionRevision + // was created. This tracks which version of the ClusterExtension spec led to + // the creation of this revision. + ClusterExtensionGenerationKey = "olm.operatorframework.io/cluster-extension-generation" ) diff --git a/manifests/experimental-e2e.yaml b/manifests/experimental-e2e.yaml index 6536baf93..ad9f28812 100644 --- a/manifests/experimental-e2e.yaml +++ b/manifests/experimental-e2e.yaml @@ -628,11 +628,11 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.type=='Available')].status - name: Available + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string - - jsonPath: .status.conditions[?(@.type=='Progressing')].status - name: Progressing + - jsonPath: .status.conditions[?(@.type=='Ready')].reason + name: Reason type: string - jsonPath: .metadata.creationTimestamp name: Age @@ -778,16 +778,6 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 - progressDeadlineMinutes: - description: |- - progressDeadlineMinutes is an optional field that defines the maximum period - of time in minutes after which an installation should be considered failed and - require manual intervention. This functionality is disabled when no value - is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). - format: int32 - maximum: 720 - minimum: 10 - type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision @@ -814,22 +804,16 @@ spec: conditions is an optional list of status conditions describing the state of the ClusterExtensionRevision. - The Progressing condition represents whether the revision is actively rolling out: - - When status is True and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and is in transition. - - When status is True and reason is Retrying, the ClusterExtensionRevision has encountered an error that could be resolved on subsequent reconciliation attempts. - - When status is True and reason is Succeeded, the ClusterExtensionRevision has reached the desired state. - - When status is False and reason is Blocked, the ClusterExtensionRevision has encountered an error that requires manual intervention for recovery. + The Ready condition represents whether the revision has been successfully rolled out and is ready: + - When status is True and reason is Ready, all ClusterExtensionRevision resources have been applied and all progression probes are successful. + - When status is False and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and objects are being applied. + - When status is False and reason is Reconciling, the ClusterExtensionRevision is being reconciled or retrying after an error. + - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes. + - When status is False and reason is ValidationFailed, the revision failed preflight validation checks. + - When status is False and reason is ObjectCollision, the revision encountered object ownership collisions. + - When status is False and reason is ProgressDeadlineExceeded, the revision has not completed within the specified progress deadline. + - When status is False and reason is Transitioning, the revision's objects are being transitioned to a newer revision. - When status is False and reason is Archived, the ClusterExtensionRevision is archived and not being actively reconciled. - - The Available condition represents whether the revision has been successfully rolled out and is available: - - When status is True and reason is ProbesSucceeded, the ClusterExtensionRevision has been successfully rolled out and all objects pass their readiness probes. - - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes during rollout. - - When status is Unknown and reason is Reconciling, the ClusterExtensionRevision has encountered an error that prevented it from observing the probes. - - When status is Unknown and reason is Archived, the ClusterExtensionRevision has been archived and its objects have been torn down. - - When status is Unknown and reason is Migrated, the ClusterExtensionRevision was migrated from an existing release and object status probe results have not yet been observed. - - The Succeeded condition represents whether the revision has successfully completed its rollout: - - When status is True and reason is Succeeded, the ClusterExtensionRevision has successfully completed its rollout. This condition is set once and persists even if the revision later becomes unavailable. items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -888,6 +872,77 @@ spec: x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map + phaseStatuses: + description: |- + phaseStatuses provides detailed status information for each phase in the revision. + Each phase status includes the phase name, current state, any error messages, + and details about failing probes if applicable. + items: + description: ClusterExtensionRevisionPhaseStatus describes the status + of a single phase. + properties: + failingProbes: + description: |- + failingProbes lists objects in this phase that are failing their readiness probes. + Only populated when state is Progressing and probes are failing. + items: + description: ClusterExtensionRevisionProbeFailure describes + a failing probe for an object in a phase. + properties: + kind: + description: kind is the Kind of the object failing probes. + type: string + message: + description: message contains the joined probe failure + messages. + type: string + name: + description: name is the name of the object failing probes. + type: string + namespace: + description: |- + namespace is the namespace of the object failing probes. + Empty for cluster-scoped objects. + type: string + required: + - kind + - message + - name + type: object + type: array + x-kubernetes-list-type: atomic + lastTransitionTime: + description: |- + lastTransitionTime is the last time the phase state changed. + This only updates when the state, message, or failingProbes change. + format: date-time + type: string + message: + description: |- + message provides additional context about the phase state. + This may include error messages for failed phases. + type: string + name: + description: name is the name of the phase. + type: string + state: + description: state represents the current state of the phase. + enum: + - Applied + - Progressing + - Failed + - Pending + - Transitioning + type: string + required: + - lastTransitionTime + - name + - state + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object type: object served: true diff --git a/manifests/experimental.yaml b/manifests/experimental.yaml index 0b750139d..dcb18419b 100644 --- a/manifests/experimental.yaml +++ b/manifests/experimental.yaml @@ -589,11 +589,11 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.type=='Available')].status - name: Available + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string - - jsonPath: .status.conditions[?(@.type=='Progressing')].status - name: Progressing + - jsonPath: .status.conditions[?(@.type=='Ready')].reason + name: Reason type: string - jsonPath: .metadata.creationTimestamp name: Age @@ -739,16 +739,6 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 - progressDeadlineMinutes: - description: |- - progressDeadlineMinutes is an optional field that defines the maximum period - of time in minutes after which an installation should be considered failed and - require manual intervention. This functionality is disabled when no value - is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). - format: int32 - maximum: 720 - minimum: 10 - type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision @@ -775,22 +765,16 @@ spec: conditions is an optional list of status conditions describing the state of the ClusterExtensionRevision. - The Progressing condition represents whether the revision is actively rolling out: - - When status is True and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and is in transition. - - When status is True and reason is Retrying, the ClusterExtensionRevision has encountered an error that could be resolved on subsequent reconciliation attempts. - - When status is True and reason is Succeeded, the ClusterExtensionRevision has reached the desired state. - - When status is False and reason is Blocked, the ClusterExtensionRevision has encountered an error that requires manual intervention for recovery. + The Ready condition represents whether the revision has been successfully rolled out and is ready: + - When status is True and reason is Ready, all ClusterExtensionRevision resources have been applied and all progression probes are successful. + - When status is False and reason is RollingOut, the ClusterExtensionRevision rollout is actively making progress and objects are being applied. + - When status is False and reason is Reconciling, the ClusterExtensionRevision is being reconciled or retrying after an error. + - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes. + - When status is False and reason is ValidationFailed, the revision failed preflight validation checks. + - When status is False and reason is ObjectCollision, the revision encountered object ownership collisions. + - When status is False and reason is ProgressDeadlineExceeded, the revision has not completed within the specified progress deadline. + - When status is False and reason is Transitioning, the revision's objects are being transitioned to a newer revision. - When status is False and reason is Archived, the ClusterExtensionRevision is archived and not being actively reconciled. - - The Available condition represents whether the revision has been successfully rolled out and is available: - - When status is True and reason is ProbesSucceeded, the ClusterExtensionRevision has been successfully rolled out and all objects pass their readiness probes. - - When status is False and reason is ProbeFailure, one or more objects are failing their readiness probes during rollout. - - When status is Unknown and reason is Reconciling, the ClusterExtensionRevision has encountered an error that prevented it from observing the probes. - - When status is Unknown and reason is Archived, the ClusterExtensionRevision has been archived and its objects have been torn down. - - When status is Unknown and reason is Migrated, the ClusterExtensionRevision was migrated from an existing release and object status probe results have not yet been observed. - - The Succeeded condition represents whether the revision has successfully completed its rollout: - - When status is True and reason is Succeeded, the ClusterExtensionRevision has successfully completed its rollout. This condition is set once and persists even if the revision later becomes unavailable. items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -849,6 +833,77 @@ spec: x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map + phaseStatuses: + description: |- + phaseStatuses provides detailed status information for each phase in the revision. + Each phase status includes the phase name, current state, any error messages, + and details about failing probes if applicable. + items: + description: ClusterExtensionRevisionPhaseStatus describes the status + of a single phase. + properties: + failingProbes: + description: |- + failingProbes lists objects in this phase that are failing their readiness probes. + Only populated when state is Progressing and probes are failing. + items: + description: ClusterExtensionRevisionProbeFailure describes + a failing probe for an object in a phase. + properties: + kind: + description: kind is the Kind of the object failing probes. + type: string + message: + description: message contains the joined probe failure + messages. + type: string + name: + description: name is the name of the object failing probes. + type: string + namespace: + description: |- + namespace is the namespace of the object failing probes. + Empty for cluster-scoped objects. + type: string + required: + - kind + - message + - name + type: object + type: array + x-kubernetes-list-type: atomic + lastTransitionTime: + description: |- + lastTransitionTime is the last time the phase state changed. + This only updates when the state, message, or failingProbes change. + format: date-time + type: string + message: + description: |- + message provides additional context about the phase state. + This may include error messages for failed phases. + type: string + name: + description: name is the name of the phase. + type: string + state: + description: state represents the current state of the phase. + enum: + - Applied + - Progressing + - Failed + - Pending + - Transitioning + type: string + required: + - lastTransitionTime + - name + - state + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object type: object served: true diff --git a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml index 012afbe83..82d01105b 100644 --- a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml +++ b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml @@ -17,8 +17,12 @@ entries: - name: test-operator.1.0.0 - name: test-operator.1.0.1 replaces: test-operator.1.0.0 - - name: test-operator.1.2.0 + - name: test-operator.1.0.2 replaces: test-operator.1.0.1 + - name: test-operator.1.0.3 + replaces: test-operator.1.0.2 + - name: test-operator.1.2.0 + replaces: test-operator.1.0.3 --- schema: olm.bundle name: test-operator.1.0.0