From e2b637c44a20859d609c51230c761be34d2a115a Mon Sep 17 00:00:00 2001 From: Yevgeny Shnaidman Date: Tue, 10 Oct 2023 16:17:22 +0300 Subject: [PATCH] Fixing 2 bugs related to failure scenarios First bug: If during module deletion scenario, UnloadKmod function returns and error for some reason, the GarbageCollectInUseLabels and UpdateNodeLabelsAndRecordEvents onw't be called. This in turn means, that if there is a module that was being loaded/unloaded in parallel, the actual action will happen (load/unload from kernel), but the label in NMC and node won't be updated. Solution: always continue with GarbageCollectInUseLabels and UpdateNodeLabelsAndRecordEvents and return a collective error later Second bug: In case ProcessModuleSpec from some spec/status has failed, the status would not be deleted from the statusMap, which will cause it later to be treated as orphaned status and the UnloadKmod to be called. Solution: always delete status from statusMap, even in case of a failure --- internal/controllers/nmc_reconciler.go | 14 ++--- internal/controllers/nmc_reconciler_test.go | 59 +++++++++++++++++++++ 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/internal/controllers/nmc_reconciler.go b/internal/controllers/nmc_reconciler.go index 46d452ea3..69d249819 100644 --- a/internal/controllers/nmc_reconciler.go +++ b/internal/controllers/nmc_reconciler.go @@ -125,10 +125,10 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r errs, fmt.Errorf("error processing Module %s: %v", moduleNameKey, err), ) - - continue } + // deleting status always (even in case of an error), so that it won't be treated + // as an orphaned status later in reconciliation delete(statusMap, moduleNameKey) } @@ -146,19 +146,15 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r } } - if err := errors.Join(errs...); err != nil { - return reconcile.Result{}, fmt.Errorf("could not process orphan statuses: %v", err) - } - if err := r.helper.GarbageCollectInUseLabels(ctx, &nmcObj); err != nil { - return reconcile.Result{}, fmt.Errorf("could not garbage collect in-use labels for NMC %s: %v", req.NamespacedName, err) + errs = append(errs, fmt.Errorf("failed to GC in-use labels for NMC %s: %v", req.NamespacedName, err)) } if err := r.helper.UpdateNodeLabelsAndRecordEvents(ctx, &nmcObj); err != nil { - return reconcile.Result{}, fmt.Errorf("could not update node's labels for NMC %s: %v", req.NamespacedName, err) + errs = append(errs, fmt.Errorf("could not update node's labels for NMC %s: %v", req.NamespacedName, err)) } - return ctrl.Result{}, nil + return ctrl.Result{}, errors.Join(errs...) } func (r *NMCReconciler) SetupWithManager(ctx context.Context, mgr manager.Manager) error { diff --git a/internal/controllers/nmc_reconciler_test.go b/internal/controllers/nmc_reconciler_test.go index 90cb0750e..c30de4af3 100644 --- a/internal/controllers/nmc_reconciler_test.go +++ b/internal/controllers/nmc_reconciler_test.go @@ -162,6 +162,65 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() { BeZero(), ) }) + + It("should complete all the reconcile functions and return combined error", func() { + const ( + mod0Name = "mod0" + mod1Name = "mod1" + mod2Name = "mod2" + ) + spec0 := kmmv1beta1.NodeModuleSpec{ + ModuleItem: kmmv1beta1.ModuleItem{ + Namespace: namespace, + Name: mod0Name, + }, + } + + status0 := kmmv1beta1.NodeModuleStatus{ + ModuleItem: kmmv1beta1.ModuleItem{ + Namespace: namespace, + Name: mod0Name, + }, + } + + status2 := kmmv1beta1.NodeModuleStatus{ + ModuleItem: kmmv1beta1.ModuleItem{ + Namespace: namespace, + Name: mod2Name, + }, + } + + nmc := &kmmv1beta1.NodeModulesConfig{ + ObjectMeta: metav1.ObjectMeta{Name: nmcName}, + Spec: kmmv1beta1.NodeModulesConfigSpec{ + Modules: []kmmv1beta1.NodeModuleSpec{spec0}, + }, + Status: kmmv1beta1.NodeModulesConfigStatus{ + Modules: []kmmv1beta1.NodeModuleStatus{status0, status2}, + }, + } + + contextWithValueMatch := gomock.AssignableToTypeOf( + reflect.TypeOf((*context.Context)(nil)).Elem(), + ) + + gomock.InOrder( + kubeClient. + EXPECT(). + Get(ctx, nmcNsn, &kmmv1beta1.NodeModulesConfig{}). + Do(func(_ context.Context, _ types.NamespacedName, kubeNmc ctrlclient.Object, _ ...ctrlclient.Options) { + *kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc + }), + wh.EXPECT().SyncStatus(ctx, nmc).Return(nil), + wh.EXPECT().ProcessModuleSpec(contextWithValueMatch, nmc, &spec0, &status0).Return(fmt.Errorf("some error")), + wh.EXPECT().ProcessUnconfiguredModuleStatus(contextWithValueMatch, nmc, &status2).Return(fmt.Errorf("some error")), + wh.EXPECT().GarbageCollectInUseLabels(ctx, nmc).Return(fmt.Errorf("some error")), + wh.EXPECT().UpdateNodeLabelsAndRecordEvents(ctx, nmc).Return(fmt.Errorf("some error")), + ) + + _, err := r.Reconcile(ctx, req) + Expect(err).ToNot(BeNil()) + }) }) var moduleConfig = kmmv1beta1.ModuleConfig{