From 679d205149676df92bebd6cf43ab6b905a744ab6 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Tue, 10 Sep 2024 11:41:13 +0200 Subject: [PATCH 1/5] Implement etcd member management in pre-terminate hook Signed-off-by: Danil-Grigorev --- .../api/v1beta1/rke2controlplane_types.go | 6 ++ .../rke2controlplane_controller.go | 98 +++++++++++++++++++ controlplane/internal/controllers/scale.go | 36 +++++-- pkg/rke2/control_plane.go | 41 ++++++++ .../clusterclass-template-docker.yaml | 1 - 5 files changed, 175 insertions(+), 7 deletions(-) diff --git a/controlplane/api/v1beta1/rke2controlplane_types.go b/controlplane/api/v1beta1/rke2controlplane_types.go index b853d326..d29c6b70 100644 --- a/controlplane/api/v1beta1/rke2controlplane_types.go +++ b/controlplane/api/v1beta1/rke2controlplane_types.go @@ -417,6 +417,12 @@ const ( // RollingUpdateStrategyType replaces the old control planes by new one using rolling update // i.e. gradually scale up or down the old control planes and scale up or down the new one. RollingUpdateStrategyType RolloutStrategyType = "RollingUpdate" + + // PreTerminateHookCleanupAnnotation is the annotation RKE2 sets on Machines to ensure it can later remove the + // etcd member right before Machine termination (i.e. before InfraMachine deletion). + // Note: Starting with Kubernetes v1.31 this hook will wait for all other pre-terminate hooks to finish to + // ensure it runs last (thus ensuring that kubelet is still working while other pre-terminate hooks run). + PreTerminateHookCleanupAnnotation = clusterv1.PreTerminateDeleteHookAnnotationPrefix + "/rke2-cleanup" ) func init() { //nolint:gochecknoinits diff --git a/controlplane/internal/controllers/rke2controlplane_controller.go b/controlplane/internal/controllers/rke2controlplane_controller.go index 528657a7..d471ed4f 100644 --- a/controlplane/internal/controllers/rke2controlplane_controller.go +++ b/controlplane/internal/controllers/rke2controlplane_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "fmt" + "strings" "time" "github.com/blang/semver/v4" @@ -32,6 +33,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -47,6 +49,7 @@ import ( "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/cluster-api/util/version" controlplanev1 "github.com/rancher/cluster-api-provider-rke2/controlplane/api/v1beta1" "github.com/rancher/cluster-api-provider-rke2/pkg/kubeconfig" @@ -514,6 +517,10 @@ func (r *RKE2ControlPlaneReconciler) reconcileNormal( return ctrl.Result{}, err } + if result, err := r.reconcilePreTerminateHook(ctx, controlPlane); err != nil || !result.IsZero() { + return result, err + } + // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations. needRollout := controlPlane.MachinesNeedingRollout() @@ -909,6 +916,97 @@ func (r *RKE2ControlPlaneReconciler) ClusterToRKE2ControlPlane(ctx context.Conte } } +func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Context, controlPlane *rke2.ControlPlane) (ctrl.Result, error) { + if !controlPlane.HasDeletingMachine() { + return ctrl.Result{}, nil + } + + log := ctrl.LoggerFrom(ctx) + + // Return early, if there is already a deleting Machine without the pre-terminate hook. + // We are going to wait until this Machine goes away before running the pre-terminate hook on other Machines. + for _, deletingMachine := range controlPlane.DeletingMachines() { + if _, exists := deletingMachine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists { + return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil + } + } + + // Pick the Machine with the oldest deletionTimestamp to keep this function deterministic / reentrant + // so we only remove the pre-terminate hook from one Machine at a time. + deletingMachines := controlPlane.DeletingMachines() + deletingMachine := controlPlane.SortedByDeletionTimestamp(deletingMachines)[0] + + log = log.WithValues("Machine", klog.KObj(deletingMachine)) + ctx = ctrl.LoggerInto(ctx, log) + + parsedVersion, err := semver.ParseTolerant(controlPlane.RCP.Spec.Version) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse Kubernetes version %q", controlPlane.RCP.Spec.Version) + } + + // Return early if there are other pre-terminate hooks for the Machine. + // The KCP pre-terminate hook should be the one executed last, so that kubelet + // is still working while other pre-terminate hooks are run. + // Note: This is done only for Kubernetes >= v1.31 to reduce the blast radius of this check. + if version.Compare(parsedVersion, semver.MustParse("1.31.0"), version.WithoutPreReleases()) >= 0 { + if machineHasOtherPreTerminateHooks(deletingMachine) { + return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil + } + } + + // Return early because the Machine controller is not yet waiting for the pre-terminate hook. + c := conditions.Get(deletingMachine, clusterv1.PreTerminateDeleteHookSucceededCondition) + if c == nil || c.Status != corev1.ConditionFalse || c.Reason != clusterv1.WaitingExternalHookReason { + return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil + } + + // The following will execute and remove the pre-terminate hook from the Machine. + + // If we have more than 1 Machine and etcd is managed we forward etcd leadership and remove the member + // to keep the etcd cluster healthy. + if controlPlane.Machines.Len() > 1 { + workloadCluster, err := r.GetWorkloadCluster(ctx, controlPlane) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine)) + } + + // Note: In regular deletion cases (remediation, scale down) the leader should have been already moved. + // We're doing this again here in case the Machine became leader again or the Machine deletion was + // triggered in another way (e.g. a user running kubectl delete machine) + etcdLeaderCandidate := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)).Newest() + if etcdLeaderCandidate != nil { + if err := workloadCluster.ForwardEtcdLeadership(ctx, deletingMachine, etcdLeaderCandidate); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to move leadership to candidate Machine %s", etcdLeaderCandidate.Name) + } + } else { + log.Info("Skip forwarding etcd leadership, because there is no other control plane Machine without a deletionTimestamp") + } + + // Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down. + // If ControlPlaneKubeletLocalMode is used, the kubelet is communicating with the local apiserver and thus now + // won't be able to see any updates to e.g. Pods anymore. + if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, deletingMachine); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine)) + } + } + + if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil { + return ctrl.Result{}, err + } + + log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", ")) + return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil +} + +func machineHasOtherPreTerminateHooks(machine *clusterv1.Machine) bool { + for k := range machine.Annotations { + if strings.HasPrefix(k, clusterv1.PreTerminateDeleteHookAnnotationPrefix) && k != controlplanev1.PreTerminateHookCleanupAnnotation { + return true + } + } + return false +} + // getWorkloadCluster gets a cluster object. // The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine. func (r *RKE2ControlPlaneReconciler) getWorkloadCluster(ctx context.Context, clusterKey types.NamespacedName) (rke2.WorkloadCluster, error) { diff --git a/controlplane/internal/controllers/scale.go b/controlplane/internal/controllers/scale.go index b836573e..35e6cb27 100644 --- a/controlplane/internal/controllers/scale.go +++ b/controlplane/internal/controllers/scale.go @@ -29,7 +29,9 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apiserver/pkg/storage/names" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -151,6 +153,17 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.New("failed to pick control plane Machine to delete") } + // During RKE2 deletion we don't care about forwarding etcd leadership or removing etcd members. + // So we are removing the pre-terminate hook. + // This is important because when deleting RKE2 we will delete all members of etcd and it's not possible + // to forward etcd leadership without any member left after we went through the Machine deletion. + // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain + // and wait for volume detach. + if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil { + + return ctrl.Result{}, err + } + // If etcd leadership is on machine that is about to be deleted, move it to the newest member available. etcdLeaderCandidate := controlPlane.Machines.Newest() if err := r.workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil { @@ -159,11 +172,7 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, err } - if err := r.workloadCluster.RemoveEtcdMemberForMachine(ctx, machineToDelete); err != nil { - logger.Error(err, "Failed to remove etcd member for machine") - - return ctrl.Result{}, err - } + // NOTE: etcd member removal will be performed by the kcp-cleanup hook after machine completes drain & all volumes are detached. logger = logger.WithValues("machine", machineToDelete) if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { @@ -178,6 +187,18 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{Requeue: true}, nil } +func (r *RKE2ControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error { + log := ctrl.LoggerFrom(ctx) + log.Info("Removing pre-terminate hook from control plane Machine") + + machineOriginal := machine.DeepCopy() + delete(machine.Annotations, controlplanev1.PreTerminateHookCleanupAnnotation) + if err := r.Client.Patch(ctx, machine, client.MergeFrom(machineOriginal)); err != nil { + return errors.Wrapf(err, "failed to remove pre-terminate hook from control plane Machine %s", klog.KObj(machine)) + } + return nil +} + // preflightChecks checks if the control plane is stable before proceeding with a scale up/scale down operation, // where stable means that: // - There are no machine deletion in progress @@ -447,7 +468,10 @@ func (r *RKE2ControlPlaneReconciler) generateMachine( return errors.Wrap(err, "failed to marshal cluster configuration") } - machine.SetAnnotations(map[string]string{controlplanev1.RKE2ServerConfigurationAnnotation: string(serverConfig)}) + machine.SetAnnotations(map[string]string{ + controlplanev1.RKE2ServerConfigurationAnnotation: string(serverConfig), + controlplanev1.PreTerminateHookCleanupAnnotation: "", + }) if err := r.Client.Create(ctx, machine); err != nil { return errors.Wrap(err, "failed to create machine") diff --git a/pkg/rke2/control_plane.go b/pkg/rke2/control_plane.go index 00973b28..1e5556e7 100644 --- a/pkg/rke2/control_plane.go +++ b/pkg/rke2/control_plane.go @@ -18,6 +18,7 @@ package rke2 import ( "context" + "sort" "github.com/go-logr/logr" "github.com/pkg/errors" @@ -268,6 +269,21 @@ func (c *ControlPlane) HasDeletingMachine() bool { return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0 } +// DeletingMachines returns machines in the control plane that are in the process of being deleted. +func (c *ControlPlane) DeletingMachines() collections.Machines { + return c.Machines.Filter(collections.HasDeletionTimestamp) +} + +// SortedByDeletionTimestamp returns the machines sorted by deletion timestamp. +func (c *ControlPlane) SortedByDeletionTimestamp(s collections.Machines) []*clusterv1.Machine { + res := make(machinesByDeletionTimestamp, 0, len(s)) + for _, value := range s { + res = append(res, value) + } + sort.Sort(res) + return res +} + // MachinesNeedingRollout return a list of machines that need to be rolled out. func (c *ControlPlane) MachinesNeedingRollout() collections.Machines { // Ignore machines to be deleted. @@ -383,3 +399,28 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error { return kerrors.NewAggregate(errList) } + +// machinesByDeletionTimestamp sorts a list of Machines by deletion timestamp, using their names as a tie breaker. +// Machines without DeletionTimestamp go after machines with this field set. +type machinesByDeletionTimestamp []*clusterv1.Machine + +func (o machinesByDeletionTimestamp) Len() int { return len(o) } +func (o machinesByDeletionTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o machinesByDeletionTimestamp) Less(i, j int) bool { + if o[i].DeletionTimestamp == nil && o[j].DeletionTimestamp == nil { + return o[i].Name < o[j].Name + } + + if o[i].DeletionTimestamp == nil { + return false + } + + if o[j].DeletionTimestamp == nil { + return true + } + + if o[i].DeletionTimestamp.Equal(o[j].DeletionTimestamp) { + return o[i].Name < o[j].Name + } + return o[i].DeletionTimestamp.Before(o[j].DeletionTimestamp) +} diff --git a/test/e2e/data/infrastructure/clusterclass-template-docker.yaml b/test/e2e/data/infrastructure/clusterclass-template-docker.yaml index 66079e04..96e11eca 100644 --- a/test/e2e/data/infrastructure/clusterclass-template-docker.yaml +++ b/test/e2e/data/infrastructure/clusterclass-template-docker.yaml @@ -161,7 +161,6 @@ spec: cni: calico disableComponents: kubernetesComponents: [ "cloudController"] - nodeDrainTimeout: 2m rolloutStrategy: type: "RollingUpdate" rollingUpdate: From eb09be3882e0330f837bdbfca07593557ba533cf Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Tue, 10 Sep 2024 15:15:38 +0200 Subject: [PATCH 2/5] Prevent removing non-drained machine for rke2 case Signed-off-by: Danil-Grigorev --- controlplane/internal/controllers/scale.go | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/controlplane/internal/controllers/scale.go b/controlplane/internal/controllers/scale.go index 35e6cb27..d95bb7d3 100644 --- a/controlplane/internal/controllers/scale.go +++ b/controlplane/internal/controllers/scale.go @@ -153,26 +153,29 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.New("failed to pick control plane Machine to delete") } - // During RKE2 deletion we don't care about forwarding etcd leadership or removing etcd members. - // So we are removing the pre-terminate hook. - // This is important because when deleting RKE2 we will delete all members of etcd and it's not possible - // to forward etcd leadership without any member left after we went through the Machine deletion. - // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain - // and wait for volume detach. - if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil { - - return ctrl.Result{}, err - } - // If etcd leadership is on machine that is about to be deleted, move it to the newest member available. etcdLeaderCandidate := controlPlane.Machines.Newest() + // Removing last memember of CP machines + if etcdLeaderCandidate == nil || !etcdLeaderCandidate.DeletionTimestamp.IsZero() { + // During complete RKE2 deletion we don't care about forwarding etcd leadership or removing etcd members. + // So we are removing the pre-terminate hook. + // This is important because when deleting RKE2 we will delete all members of etcd and it's not possible + // to forward etcd leadership without any member left after we went through the Machine deletion. + // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain + // and wait for volume detach. + if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil { + + return ctrl.Result{}, err + } + } + if err := r.workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil { logger.Error(err, "Failed to move leadership to candidate machine", "candidate", etcdLeaderCandidate.Name) return ctrl.Result{}, err } - // NOTE: etcd member removal will be performed by the kcp-cleanup hook after machine completes drain & all volumes are detached. + // NOTE: etcd member removal will be performed by the rke2-cleanup hook after machine completes drain & all volumes are detached. logger = logger.WithValues("machine", machineToDelete) if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { From 931e7cc55a2e4bfbe03344eb3260aa16f33ff7a3 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Tue, 10 Sep 2024 15:51:02 +0200 Subject: [PATCH 3/5] Propagate PreTerminateHookCleanupAnnotation on old machines - Lint fixes Signed-off-by: Danil-Grigorev --- .../rke2controlplane_controller.go | 25 ++++++++++++++++--- controlplane/internal/controllers/scale.go | 3 ++- pkg/rke2/control_plane.go | 3 +++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/controlplane/internal/controllers/rke2controlplane_controller.go b/controlplane/internal/controllers/rke2controlplane_controller.go index d471ed4f..c4ee10d7 100644 --- a/controlplane/internal/controllers/rke2controlplane_controller.go +++ b/controlplane/internal/controllers/rke2controlplane_controller.go @@ -550,6 +550,21 @@ func (r *RKE2ControlPlaneReconciler) reconcileNormal( desiredReplicas := int(*rcp.Spec.Replicas) switch { + case numMachines == desiredReplicas: + nonDeleteingMachines := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) + for _, machine := range nonDeleteingMachines { + annotaions := machine.GetAnnotations() + + if _, found := annotaions[controlplanev1.PreTerminateHookCleanupAnnotation]; !found { + annotaions[controlplanev1.PreTerminateHookCleanupAnnotation] = "" + machine.SetAnnotations(annotaions) + } + } + + // Patch machine annoations + if err := controlPlane.PatchMachines(ctx); err != nil { + return ctrl.Result{}, err + } // We are creating the first replica case numMachines < desiredReplicas && numMachines == 0: // Create new Machine w/ init @@ -945,7 +960,7 @@ func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Conte } // Return early if there are other pre-terminate hooks for the Machine. - // The KCP pre-terminate hook should be the one executed last, so that kubelet + // The CAPRKE2 pre-terminate hook should be the one executed last, so that kubelet // is still working while other pre-terminate hooks are run. // Note: This is done only for Kubernetes >= v1.31 to reduce the blast radius of this check. if version.Compare(parsedVersion, semver.MustParse("1.31.0"), version.WithoutPreReleases()) >= 0 { @@ -967,7 +982,8 @@ func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Conte if controlPlane.Machines.Len() > 1 { workloadCluster, err := r.GetWorkloadCluster(ctx, controlPlane) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine)) + return ctrl.Result{}, errors.Wrapf(err, + "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine)) } // Note: In regular deletion cases (remediation, scale down) the leader should have been already moved. @@ -994,7 +1010,9 @@ func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Conte return ctrl.Result{}, err } - log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", ")) + log.Info("Waiting for Machines to be deleted", "machines", + strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", ")) + return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil } @@ -1004,6 +1022,7 @@ func machineHasOtherPreTerminateHooks(machine *clusterv1.Machine) bool { return true } } + return false } diff --git a/controlplane/internal/controllers/scale.go b/controlplane/internal/controllers/scale.go index d95bb7d3..ba2c4dc9 100644 --- a/controlplane/internal/controllers/scale.go +++ b/controlplane/internal/controllers/scale.go @@ -164,7 +164,6 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain // and wait for volume detach. if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil { - return ctrl.Result{}, err } } @@ -196,9 +195,11 @@ func (r *RKE2ControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine machineOriginal := machine.DeepCopy() delete(machine.Annotations, controlplanev1.PreTerminateHookCleanupAnnotation) + if err := r.Client.Patch(ctx, machine, client.MergeFrom(machineOriginal)); err != nil { return errors.Wrapf(err, "failed to remove pre-terminate hook from control plane Machine %s", klog.KObj(machine)) } + return nil } diff --git a/pkg/rke2/control_plane.go b/pkg/rke2/control_plane.go index 1e5556e7..81af9b65 100644 --- a/pkg/rke2/control_plane.go +++ b/pkg/rke2/control_plane.go @@ -280,7 +280,9 @@ func (c *ControlPlane) SortedByDeletionTimestamp(s collections.Machines) []*clus for _, value := range s { res = append(res, value) } + sort.Sort(res) + return res } @@ -422,5 +424,6 @@ func (o machinesByDeletionTimestamp) Less(i, j int) bool { if o[i].DeletionTimestamp.Equal(o[j].DeletionTimestamp) { return o[i].Name < o[j].Name } + return o[i].DeletionTimestamp.Before(o[j].DeletionTimestamp) } From ed7e46764fd370aa58495b4e52b98ad49e9fc40f Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Wed, 11 Sep 2024 10:35:05 +0200 Subject: [PATCH 4/5] Ensure last machine removed does not wait for hook When the last machine is in a deleting state, this means that cluster is removed also. In such scenario, waiting for draining is not feasible, because it is performes only when node deletion is allowed. Which is not, due to cluster removal. Cluster API prevents draining with the "cluster is being deleted" error. Signed-off-by: Danil-Grigorev --- .../rke2controlplane_controller.go | 21 ++++++++++++++++++- controlplane/internal/controllers/scale.go | 18 +++++----------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/controlplane/internal/controllers/rke2controlplane_controller.go b/controlplane/internal/controllers/rke2controlplane_controller.go index c4ee10d7..65755715 100644 --- a/controlplane/internal/controllers/rke2controlplane_controller.go +++ b/controlplane/internal/controllers/rke2controlplane_controller.go @@ -720,7 +720,7 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context, } // Delete control plane machines in parallel - machinesToDelete := ownedMachines.Filter(collections.Not(collections.HasDeletionTimestamp)) + machinesToDelete := ownedMachines var errs []error @@ -728,6 +728,23 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context, m := machinesToDelete[i] logger := logger.WithValues("machine", m) + // During RKE2CP deletion we don't care about forwarding etcd leadership or removing etcd members. + // So we are removing the pre-terminate hook. + // This is important because when deleting KCP we will delete all members of etcd and it's not possible + // to forward etcd leadership without any member left after we went through the Machine deletion. + // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain + // and wait for volume detach. + if err := r.removePreTerminateHookAnnotationFromMachine(ctx, m); err != nil { + errs = append(errs, err) + + continue + } + + if !m.DeletionTimestamp.IsZero() { + // Nothing to do, Machine already has deletionTimestamp set. + continue + } + if err := r.Client.Delete(ctx, machinesToDelete[i]); err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to cleanup owned machine") errs = append(errs, err) @@ -742,6 +759,8 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context, return ctrl.Result{}, err } + logger.Info("Waiting for control plane Machines to not exist anymore") + conditions.MarkFalse(rcp, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil diff --git a/controlplane/internal/controllers/scale.go b/controlplane/internal/controllers/scale.go index ba2c4dc9..d4eaff05 100644 --- a/controlplane/internal/controllers/scale.go +++ b/controlplane/internal/controllers/scale.go @@ -155,19 +155,6 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( // If etcd leadership is on machine that is about to be deleted, move it to the newest member available. etcdLeaderCandidate := controlPlane.Machines.Newest() - // Removing last memember of CP machines - if etcdLeaderCandidate == nil || !etcdLeaderCandidate.DeletionTimestamp.IsZero() { - // During complete RKE2 deletion we don't care about forwarding etcd leadership or removing etcd members. - // So we are removing the pre-terminate hook. - // This is important because when deleting RKE2 we will delete all members of etcd and it's not possible - // to forward etcd leadership without any member left after we went through the Machine deletion. - // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain - // and wait for volume detach. - if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil { - return ctrl.Result{}, err - } - } - if err := r.workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil { logger.Error(err, "Failed to move leadership to candidate machine", "candidate", etcdLeaderCandidate.Name) @@ -190,6 +177,11 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane( } func (r *RKE2ControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error { + if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists { + // Nothing to do, the annotation is not set (anymore) on the Machine + return nil + } + log := ctrl.LoggerFrom(ctx) log.Info("Removing pre-terminate hook from control plane Machine") From fc6f21d8131800bc1a05875011209ae0794d58c2 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Thu, 12 Sep 2024 09:21:57 +0200 Subject: [PATCH 5/5] Ensure running machines always contain hook annoation Signed-off-by: Danil-Grigorev --- .../rke2controlplane_controller.go | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/controlplane/internal/controllers/rke2controlplane_controller.go b/controlplane/internal/controllers/rke2controlplane_controller.go index 65755715..6de906fc 100644 --- a/controlplane/internal/controllers/rke2controlplane_controller.go +++ b/controlplane/internal/controllers/rke2controlplane_controller.go @@ -550,21 +550,6 @@ func (r *RKE2ControlPlaneReconciler) reconcileNormal( desiredReplicas := int(*rcp.Spec.Replicas) switch { - case numMachines == desiredReplicas: - nonDeleteingMachines := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) - for _, machine := range nonDeleteingMachines { - annotaions := machine.GetAnnotations() - - if _, found := annotaions[controlplanev1.PreTerminateHookCleanupAnnotation]; !found { - annotaions[controlplanev1.PreTerminateHookCleanupAnnotation] = "" - machine.SetAnnotations(annotaions) - } - } - - // Patch machine annoations - if err := controlPlane.PatchMachines(ctx); err != nil { - return ctrl.Result{}, err - } // We are creating the first replica case numMachines < desiredReplicas && numMachines == 0: // Create new Machine w/ init @@ -730,7 +715,7 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context, // During RKE2CP deletion we don't care about forwarding etcd leadership or removing etcd members. // So we are removing the pre-terminate hook. - // This is important because when deleting KCP we will delete all members of etcd and it's not possible + // This is important because when deleting RKE2CP we will delete all members of etcd and it's not possible // to forward etcd leadership without any member left after we went through the Machine deletion. // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain // and wait for volume detach. @@ -951,6 +936,23 @@ func (r *RKE2ControlPlaneReconciler) ClusterToRKE2ControlPlane(ctx context.Conte } func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Context, controlPlane *rke2.ControlPlane) (ctrl.Result, error) { + // Ensure that every active machine has the drain hook set + patchHookAnnotation := false + + for _, machine := range controlPlane.Machines.Filter(collections.ActiveMachines) { + if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists { + machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation] = "" + patchHookAnnotation = true + } + } + + if patchHookAnnotation { + // Patch machine annoations + if err := controlPlane.PatchMachines(ctx); err != nil { + return ctrl.Result{}, err + } + } + if !controlPlane.HasDeletingMachine() { return ctrl.Result{}, nil } @@ -1018,8 +1020,6 @@ func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Conte } // Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down. - // If ControlPlaneKubeletLocalMode is used, the kubelet is communicating with the local apiserver and thus now - // won't be able to see any updates to e.g. Pods anymore. if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, deletingMachine); err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine)) }