Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pkg/interruptionevent/draincordon/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,18 @@ func (h *Handler) HandleEvent(drainEvent *monitor.InterruptionEvent) error {
}

if drainEvent.PreDrainTask != nil {
h.commonHandler.RunPreDrainTask(nodeName, drainEvent)
if err := h.commonHandler.RunPreDrainTask(nodeName, drainEvent); err != nil {
log.Err(err).Str("nodeName", nodeName).Msg("Pre-drain task failed; aborting to allow SQS retry")
h.commonHandler.InterruptionEventStore.CancelInterruptionEvent(drainEvent.EventID)

// If the node is missing and the user opted for DeleteSqsMsgIfNodeNotFound then delete the SQS message
if !nodeFound && h.commonHandler.NthConfig.DeleteSqsMsgIfNodeNotFound && drainEvent.PostDrainTask != nil {
h.commonHandler.RunPostDrainTask(nodeName, drainEvent)
return nil
}

return err
}
}

podNameList, err := h.commonHandler.Node.FetchPodNameList(nodeName)
Expand Down
3 changes: 2 additions & 1 deletion pkg/interruptionevent/internal/common/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (h *Handler) GetNodeName(drainEvent *monitor.InterruptionEvent) (string, er
return nodeName, nil
}

func (h *Handler) RunPreDrainTask(nodeName string, drainEvent *monitor.InterruptionEvent) {
func (h *Handler) RunPreDrainTask(nodeName string, drainEvent *monitor.InterruptionEvent) error {
err := drainEvent.PreDrainTask(*drainEvent, h.Node)
if err != nil {
log.Err(err).Msg("There was a problem executing the pre-drain task")
Expand All @@ -53,6 +53,7 @@ func (h *Handler) RunPreDrainTask(nodeName string, drainEvent *monitor.Interrupt
h.Recorder.Emit(nodeName, observability.Normal, observability.PreDrainReason, observability.PreDrainMsg)
}
h.Metrics.NodeActionsInc("pre-drain", nodeName, drainEvent.EventID, err)
return err
}

func (h *Handler) RunCancelDrainTask(nodeName string, drainEvent *monitor.InterruptionEvent) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/sqsevent/spot-itn-event.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event *EventBridgeEven
if err != nil {
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
}
return nil
return err
}
return &interruptionEvent, nil
}
Loading