Skip to content

Commit

Permalink
Add events to the maintenance process
Browse files Browse the repository at this point in the history
The events will help to identify the maintenance process by looking on nm and node events.
  • Loading branch information
razo7 committed Jan 28, 2024
1 parent 0dc7c17 commit add8cb5
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 7 deletions.
4 changes: 3 additions & 1 deletion controllers/controller_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ var _ = BeforeSuite(func() {
Expect(k8sClient).NotTo(BeNil())

mockManager, _ := lease.NewManager(k8sClient, "")
fakeRecorder = record.NewFakeRecorder(20)
// Create a ReconcileNodeMaintenance object with the scheme and fake client
r = &NodeMaintenanceReconciler{
Client: k8sClient,
Scheme: scheme.Scheme,
LeaseManager: &mockLeaseManager{mockManager},
Recorder: fakeRecorder,
logger: ctrl.Log.WithName("unit test"),
}
Expect(initDrainer(r, cfg)).To(Succeed())
Expand All @@ -111,4 +113,4 @@ var _ = AfterSuite(func() {
By("tearing down the test environment")
cancel()
Expect(testEnv.Stop()).To(Succeed())
})
})
17 changes: 17 additions & 0 deletions controllers/nodemaintenance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
"k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/drain"
Expand All @@ -42,6 +43,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

nodemaintenancev1beta1 "github.com/medik8s/node-maintenance-operator/api/v1beta1"
"github.com/medik8s/node-maintenance-operator/pkg/utils"
)

const (
Expand All @@ -60,6 +62,7 @@ type NodeMaintenanceReconciler struct {
client.Client
Scheme *runtime.Scheme
LeaseManager lease.Manager
Recorder record.EventRecorder
drainer *drain.Helper
logger logr.Logger
}
Expand Down Expand Up @@ -116,6 +119,8 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if err := r.Client.Update(context.TODO(), nm); err != nil {
return r.onReconcileError(nm, err)
}
// begin maintenance on adding finalizer
utils.NormalEvent(r.Recorder, nm, utils.EventReasonBeginMaintenance, utils.EventMessageBeginMaintenance)
}
} else {
r.logger.Info("Deletion timestamp not zero")
Expand All @@ -135,6 +140,8 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if err := r.Client.Update(context.Background(), nm); err != nil {
return r.onReconcileError(nm, err)
}
// end maintenance on removing finalizer, taints, and node is already uncordoned
utils.NormalEvent(r.Recorder, nm, utils.EventReasonRemovedMaintenance, utils.EventMessageRemovedMaintenance)
}
return reconcile.Result{}, nil
}
Expand Down Expand Up @@ -166,6 +173,8 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if err != nil {
return r.onReconcileError(nm, fmt.Errorf("failed to uncordon upon failure to obtain owned lease : %v ", err))
}
// maintenance has failed - node was uncordon and under maintenance mode
utils.WarningEvent(r.Recorder, nm, utils.EventReasonFailedMaintenance, utils.EventMessageFailedMaintenance)
nm.Status.Phase = nodemaintenancev1beta1.MaintenanceFailed
}
return r.onReconcileError(nm, fmt.Errorf("failed to extend lease owned by us : %v errorOnLeaseCount %d", err, nm.Status.ErrorOnLeaseCount))
Expand All @@ -175,6 +184,8 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return r.onReconcileError(nm, err)
} else {
if nm.Status.Phase != nodemaintenancev1beta1.MaintenanceRunning || nm.Status.ErrorOnLeaseCount != 0 {
// Another chance to evict pods - clear ErrorOnLeaseCount and try again to put the node under maintenance
utils.NormalEvent(r.Recorder, nm, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods)
nm.Status.Phase = nodemaintenancev1beta1.MaintenanceRunning
nm.Status.ErrorOnLeaseCount = 0

Expand All @@ -199,9 +210,13 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ

if err = drain.RunNodeDrain(r.drainer, nodeName); err != nil {
r.logger.Info("Not all pods evicted", "nodeName", nodeName, "error", err)
// maintenance in progres - some pods haven't been evicted
utils.NormalEvent(r.Recorder, nm, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods)
waitOnReconcile := waitDurationOnDrainError
return r.onReconcileErrorWithRequeue(nm, err, &waitOnReconcile)
} else if nm.Status.Phase != nodemaintenancev1beta1.MaintenanceSucceeded {
// maintenance has completed - node is under maintenance mode
utils.NormalEvent(r.Recorder, nm, utils.EventReasonSucceedMaintenance, utils.EventMessageSucceedMaintenance)
setLastUpdate(nm)
}

Expand Down Expand Up @@ -359,6 +374,8 @@ func (r *NodeMaintenanceReconciler) stopNodeMaintenanceImp(ctx context.Context,
return err
}

// end maintenance on removing finalizer - taints have been removed and node was uncordoned
utils.NormalEvent(r.Recorder, node, utils.EventReasonUncordonNode, utils.EventMessageUncordonNode)
if err := r.LeaseManager.InvalidateLease(ctx, node); err != nil {
return err
}
Expand Down
66 changes: 60 additions & 6 deletions controllers/nodemaintenance_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controllers

import (
"context"
"fmt"
"reflect"
"time"

Expand All @@ -18,6 +19,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

nodemaintenanceapi "github.com/medik8s/node-maintenance-operator/api/v1beta1"
utils "github.com/medik8s/node-maintenance-operator/pkg/utils"
)

const (
Expand Down Expand Up @@ -47,9 +49,6 @@ var _ = Describe("Node Maintenance", func() {
}
nodeOne = getNode(taintedNodeName)
})
JustBeforeEach(func() {
sleepOneSec()
})

Context("Taint functioninality test", func() {
// var nm *nodemaintenanceapi.NodeMaintenance
Expand Down Expand Up @@ -99,6 +98,7 @@ var _ = Describe("Node Maintenance", func() {
Expect(k8sClient.Create(context.Background(), podTwo)).To(Succeed())
DeferCleanup(cleanupPod, context.Background(), podOne)
DeferCleanup(cleanupPod, context.Background(), podTwo)
DeferCleanup(clearEvents)
})

When("nm CR is valid", func() {
Expand All @@ -124,6 +124,7 @@ var _ = Describe("Node Maintenance", func() {
//Re-fetch node after nm CR deletion
Expect(k8sClient.Delete(context.Background(), nm)).To(Succeed())
sleepOneSec()
verifyEvent(corev1.EventTypeNormal, utils.EventReasonRemovedMaintenance, utils.EventMessageRemovedMaintenance)

Expect(k8sClient.Get(context.Background(), client.ObjectKey{Name: nm.Spec.NodeName}, node)).NotTo(HaveOccurred())
_, exist := node.Labels[labels.ExcludeFromRemediation]
Expand All @@ -148,7 +149,7 @@ func sleepOneSec() {
time.Sleep(1 * time.Second)
}

func getNMAfterReconcile(nm *nodemaintenanceapi.NodeMaintenance) *nodemaintenanceapi.NodeMaintenance {
func getNMAfterTimeout(nm *nodemaintenanceapi.NodeMaintenance) *nodemaintenanceapi.NodeMaintenance {
maintenance := &nodemaintenanceapi.NodeMaintenance{}
Consistently(func() error {
return k8sClient.Get(context.Background(), client.ObjectKeyFromObject(nm), maintenance)
Expand All @@ -157,15 +158,18 @@ func getNMAfterReconcile(nm *nodemaintenanceapi.NodeMaintenance) *nodemaintenanc
}
func checkSuccesfulReconcile(nm *nodemaintenanceapi.NodeMaintenance) {
By("check success Phase and drain progress was completed")
maintenance := getNMAfterReconcile(nm)
maintenance := getNMAfterTimeout(nm)
Expect(maintenance.Status.Phase).To(Equal(nodemaintenanceapi.MaintenanceSucceeded))
Expect(maintenance.Status.DrainProgress).To(Equal(100))
verifyEvent(corev1.EventTypeNormal, utils.EventReasonSucceedMaintenance, utils.EventMessageSucceedMaintenance)
}

func checkFailedReconcile(nm *nodemaintenanceapi.NodeMaintenance) {
By("check whether node status LastError was updated")
maintenance := getNMAfterReconcile(nm)
maintenance := getNMAfterTimeout(nm)
Expect(maintenance.Status.LastError).NotTo(Equal(""))
verifyEvent(corev1.EventTypeNormal, utils.EventReasonBeginMaintenance, utils.EventMessageBeginMaintenance)
verifyNoEvent(corev1.EventTypeNormal, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods)
}

func taintExist(node *corev1.Node, key string, effect corev1.TaintEffect) bool {
Expand Down Expand Up @@ -259,6 +263,56 @@ func getTestNM(crName, nodeName string) *nodemaintenanceapi.NodeMaintenance {
}
}

func verifyEvent(eventType, eventReason, eventMessage string) {
By(fmt.Sprintf("Verifying that event %s was created", eventReason))
isEventMatch := isEventOccurred(eventType, eventReason, eventMessage)
ExpectWithOffset(1, isEventMatch).To(BeTrue())
}

func verifyNoEvent(eventType, eventReason, eventMessage string) {
By(fmt.Sprintf("Verifying that event %s was not created", eventReason))
isEventMatch := isEventOccurred(eventType, eventReason, eventMessage)
ExpectWithOffset(1, isEventMatch).To(BeFalse())
}

// isEventOccurred checks whether an event has occoured
func isEventOccurred(eventType, eventReason, eventMessage string) bool {
expected := fmt.Sprintf("%s %s %s", eventType, eventReason, eventMessage)
isEventMatch := false

unMatchedEvents := make(chan string, len(fakeRecorder.Events))
isDone := false
for {
select {
case event := <-fakeRecorder.Events:
if isEventMatch = event == expected; isEventMatch {
isDone = true
} else {
unMatchedEvents <- event
}
default:
isDone = true
}
if isDone {
break
}
}

close(unMatchedEvents)
for unMatchedEvent := range unMatchedEvents {
fakeRecorder.Events <- unMatchedEvent
}
return isEventMatch
}

// clearEvents loop over the events channel until it is empty from events
func clearEvents() {
for len(fakeRecorder.Events) > 0 {
<-fakeRecorder.Events
}
tesLog.Info("Cleanup: events list is empty")
}

type mockLeaseManager struct {
lease.Manager
}
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func main() {
Client: cl,
Scheme: mgr.GetScheme(),
LeaseManager: leaseManagerInitializer,
Recorder: mgr.GetEventRecorderFor("NodeMaintenanceOperator"),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenance")
os.Exit(1)
Expand Down
49 changes: 49 additions & 0 deletions pkg/utils/events.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package utils

import (
"fmt"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
)

const (
// events reasons
EventReasonBeginMaintenance = "BeginMaintenance"
EventReasonEvictingPods = "EvictingPods"
EventReasonFailedMaintenance = "FailedMaintenance"
EventReasonSucceedMaintenance = "SucceedMaintenance"
EventReasonUncordonNode = "UncordonNode"
EventReasonRemovedMaintenance = "RemovedMaintenance"

// events messages
EventMessageBeginMaintenance = "Begin maintenance"
EventMessageEvictingPods = "Evicting pods"
EventMessageFailedMaintenance = "Failed maintenance"
EventMessageSucceedMaintenance = "Node maintenance was succeed"
EventMessageUncordonNode = "Uncordon node"
EventMessageRemovedMaintenance = "Removed maintenance"
)

// NormalEvent will record an event with type Normal and fixed message.
func NormalEvent(recorder record.EventRecorder, object runtime.Object, reason, message string) {
recorder.Event(object, corev1.EventTypeNormal, reason, message)
}

// NormalEventf will record an event with type Normal and formatted message.
func NormalEventf(recorder record.EventRecorder, object runtime.Object, reason, messageFmt string, a ...interface{}) {
message := fmt.Sprintf(messageFmt, a...)
recorder.Event(object, corev1.EventTypeNormal, reason, message)
}

// WarningEvent will record an event with type Warning and fixed message.
func WarningEvent(recorder record.EventRecorder, object runtime.Object, reason, message string) {
recorder.Event(object, corev1.EventTypeWarning, reason, message)
}

// WarningEventf will record an event with type Warning and formatted message.
func WarningEventf(recorder record.EventRecorder, object runtime.Object, reason, messageFmt string, a ...interface{}) {
message := fmt.Sprintf(messageFmt, a...)
recorder.Event(object, corev1.EventTypeWarning, reason, message)
}

0 comments on commit add8cb5

Please sign in to comment.