From 3596563341392307852f716641be375763a4d27b Mon Sep 17 00:00:00 2001 From: TomerNewman Date: Sun, 17 Nov 2024 12:32:11 +0200 Subject: [PATCH] Removing labels when node not ready Until now nodes kept their kmod labels when became not ready. Today we are deleting them when they are not ready due to potential race condition. --- internal/controllers/nmc_reconciler.go | 6 +- internal/controllers/nmc_reconciler_test.go | 94 ++++++++++++++++++--- internal/node/mock_node.go | 14 +++ internal/node/node.go | 16 ++++ internal/node/node_test.go | 46 +++++++++- 5 files changed, 161 insertions(+), 15 deletions(-) diff --git a/internal/controllers/nmc_reconciler.go b/internal/controllers/nmc_reconciler.go index 33d9f170a..58fe4628f 100644 --- a/internal/controllers/nmc_reconciler.go +++ b/internal/controllers/nmc_reconciler.go @@ -127,8 +127,12 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r return ctrl.Result{}, fmt.Errorf("could not get node %s: %v", nmcObj.Name, err) } - // skipping handling NMC spec, labelling, events until node becomes ready + // skipping handling NMC spec, events until node becomes ready + // removing label of loaded kmods if !r.nodeAPI.IsNodeSchedulable(&node) { + if err := r.nodeAPI.RemoveNodeReadyLabels(ctx, &node); err != nil { + return ctrl.Result{}, fmt.Errorf("could remove node %s labels: %v", node.Name, err) + } return ctrl.Result{}, nil } diff --git a/internal/controllers/nmc_reconciler_test.go b/internal/controllers/nmc_reconciler_test.go index f32dcfde7..39b3d2cdf 100644 --- a/internal/controllers/nmc_reconciler_test.go +++ b/internal/controllers/nmc_reconciler_test.go @@ -37,13 +37,15 @@ import ( ) const ( - nmcName = "nmc" - nsFirst = "example-ns-1" - nsSecond = "example-ns-2" - nameFirst = "example-name-1" - nameSecond = "example-name-2" - imageFirst = "example-image-1" - imageSecond = "example-image-2" + nmcName = "nmc" + nsFirst = "example-ns-1" + nsSecond = "example-ns-2" + nameFirst = "example-name-1" + nameSecond = "example-name-2" + imageFirst = "example-image-1" + imageSecond = "example-image-2" + kmodName = "kmm.node.kubernetes.io/test-ns.test-module.ready" + labelNotToRemove = "example.node.kubernetes.io/label-not-to-be-removed" ) var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() { @@ -126,11 +128,25 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() { Expect(err).To(HaveOccurred()) }) - It("should not continue if node is not schedulable", func() { + It("should remove kmod labels and not continue if node is not schedulable", func() { nmc := &kmmv1beta1.NodeModulesConfig{ ObjectMeta: metav1.ObjectMeta{Name: nmcName}, } - node := v1.Node{} + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + kmodName: "", + labelNotToRemove: "", + }, + }, + } + expectedNode := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + labelNotToRemove: "", + }, + }, + } gomock.InOrder( kubeClient. EXPECT(). @@ -139,12 +155,70 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() { *kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc }), wh.EXPECT().SyncStatus(ctx, nmc), - kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &node).Return(nil), + kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &v1.Node{}).DoAndReturn( + func(_ context.Context, _ types.NamespacedName, fetchedNode *v1.Node, _ ...ctrlclient.Options) error { + *fetchedNode = node + return nil + }, + ), nm.EXPECT().IsNodeSchedulable(&node).Return(false), + nm.EXPECT().RemoveNodeReadyLabels(ctx, &node).DoAndReturn( + func(_ context.Context, obj ctrlclient.Object) error { + delete(node.ObjectMeta.Labels, kmodName) + return nil + }, + ), ) _, err := r.Reconcile(ctx, req) Expect(err).To(BeNil()) + Expect(node).To(Equal(expectedNode)) + }) + + It("should fail to remove kmod labels and not continue if node is not schedulable", func() { + nmc := &kmmv1beta1.NodeModulesConfig{ + ObjectMeta: metav1.ObjectMeta{Name: nmcName}, + } + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + kmodName: "", + labelNotToRemove: "", + }, + }, + } + expectedNode := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + labelNotToRemove: "", + }, + }, + } + gomock.InOrder( + kubeClient. + EXPECT(). + Get(ctx, nmcNsn, &kmmv1beta1.NodeModulesConfig{}). + Do(func(_ context.Context, _ types.NamespacedName, kubeNmc ctrlclient.Object, _ ...ctrlclient.Options) { + *kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc + }), + wh.EXPECT().SyncStatus(ctx, nmc), + kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &v1.Node{}).DoAndReturn( + func(_ context.Context, _ types.NamespacedName, fetchedNode *v1.Node, _ ...ctrlclient.Options) error { + *fetchedNode = node + return nil + }, + ), + nm.EXPECT().IsNodeSchedulable(&node).Return(false), + nm.EXPECT().RemoveNodeReadyLabels(ctx, &node).DoAndReturn( + func(_ context.Context, obj ctrlclient.Object) error { + return fmt.Errorf("some error") + }, + ), + ) + + _, err := r.Reconcile(ctx, req) + Expect(err).To(HaveOccurred()) + Expect(node).ToNot(Equal(expectedNode)) }) It("should process spec entries and orphan statuses", func() { diff --git a/internal/node/mock_node.go b/internal/node/mock_node.go index 0d0556d40..bfce1de06 100644 --- a/internal/node/mock_node.go +++ b/internal/node/mock_node.go @@ -98,6 +98,20 @@ func (mr *MockNodeMockRecorder) NodeBecomeReadyAfter(node, checkTime any) *gomoc return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NodeBecomeReadyAfter", reflect.TypeOf((*MockNode)(nil).NodeBecomeReadyAfter), node, checkTime) } +// RemoveNodeReadyLabels mocks base method. +func (m *MockNode) RemoveNodeReadyLabels(ctx context.Context, node *v1.Node) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemoveNodeReadyLabels", ctx, node) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemoveNodeReadyLabels indicates an expected call of RemoveNodeReadyLabels. +func (mr *MockNodeMockRecorder) RemoveNodeReadyLabels(ctx, node any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveNodeReadyLabels", reflect.TypeOf((*MockNode)(nil).RemoveNodeReadyLabels), ctx, node) +} + // UpdateLabels mocks base method. func (m *MockNode) UpdateLabels(ctx context.Context, node *v1.Node, toBeAdded, toBeRemoved []string) error { m.ctrl.T.Helper() diff --git a/internal/node/node.go b/internal/node/node.go index 47b2c1c44..91f4fc853 100644 --- a/internal/node/node.go +++ b/internal/node/node.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "github.com/kubernetes-sigs/kernel-module-management/internal/meta" + "github.com/kubernetes-sigs/kernel-module-management/internal/utils" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -18,6 +19,7 @@ type Node interface { GetNumTargetedNodes(ctx context.Context, selector map[string]string) (int, error) UpdateLabels(ctx context.Context, node *v1.Node, toBeAdded, toBeRemoved []string) error NodeBecomeReadyAfter(node *v1.Node, checkTime metav1.Time) bool + RemoveNodeReadyLabels(ctx context.Context, node *v1.Node) error } type node struct { @@ -78,6 +80,20 @@ func (n *node) UpdateLabels(ctx context.Context, node *v1.Node, toBeAdded, toBeR return nil } +func (n *node) RemoveNodeReadyLabels(ctx context.Context, node *v1.Node) error { + var labelsToRemove []string + for label := range node.GetLabels() { + if ok, _, _ := utils.IsKernelModuleReadyNodeLabel(label); ok || + utils.IsDeprecatedKernelModuleReadyNodeLabel(label) { + labelsToRemove = append(labelsToRemove, label) + } + } + if err := n.UpdateLabels(ctx, node, []string{}, labelsToRemove); err != nil { + return fmt.Errorf("could update node %s labels: %v", node.Name, err) + } + return nil +} + func (n *node) NodeBecomeReadyAfter(node *v1.Node, timestamp metav1.Time) bool { conds := node.Status.Conditions for i := 0; i < len(conds); i++ { diff --git a/internal/node/node_test.go b/internal/node/node_test.go index 876bb48de..7909d58a6 100644 --- a/internal/node/node_test.go +++ b/internal/node/node_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "github.com/kubernetes-sigs/kernel-module-management/internal/client" - "github.com/kubernetes-sigs/kernel-module-management/internal/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "go.uber.org/mock/gomock" @@ -121,9 +120,10 @@ var _ = Describe("GetNodesListBySelector", func() { }) }) -var ( - loadedKernelModuleReadyNodeLabel = utils.GetKernelModuleReadyNodeLabel("loaded-ns", "loaded-n") - unloadedKernelModuleReadyNodeLabel = utils.GetKernelModuleReadyNodeLabel("unloaded-ns", "unloaded-n") +const ( + loadedKernelModuleReadyNodeLabel = "kmm.node.kubernetes.io/loaded-ns.loaded-n.ready" + unloadedKernelModuleReadyNodeLabel = "kmm.node.kubernetes.io/unloaded-ns.unloaded-n.ready" + notKernelModuleReadyNodeLabel = "example.node.kubernetes.io/label-not-to-be-removed" ) var _ = Describe("UpdateLabels", func() { @@ -279,6 +279,44 @@ var _ = Describe("NodeBecomeReadyAfter", func() { }) }) +var _ = Describe("RemoveNodeReadyLabels", func() { + var ( + ctrl *gomock.Controller + n Node + node *v1.Node + ctx context.Context + clnt *client.MockClient + ) + + BeforeEach(func() { + ctrl = gomock.NewController(GinkgoT()) + clnt = client.NewMockClient(ctrl) + ctx = context.TODO() + n = NewNode(clnt) + node = &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + loadedKernelModuleReadyNodeLabel: "", + notKernelModuleReadyNodeLabel: "", + }, + }, + } + }) + + It("Should remove all kmod labels", func() { + clnt.EXPECT().Patch(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + err := n.RemoveNodeReadyLabels(ctx, node) + Expect(err).To(BeNil()) + Expect(node.Labels).ToNot(HaveKey(loadedKernelModuleReadyNodeLabel)) + Expect(node.Labels).To(HaveKey(notKernelModuleReadyNodeLabel)) + }) + It("Should fail", func() { + clnt.EXPECT().Patch(gomock.Any(), gomock.Any(), gomock.Any()).Return(fmt.Errorf("some error")) + err := n.RemoveNodeReadyLabels(ctx, node) + Expect(err).ToNot(BeNil()) + }) +}) + var _ = Describe("addLabels", func() { var node v1.Node