Skip to content

Commit

Permalink
Fixing 2 bugs related to failure scenarios (#599)
Browse files Browse the repository at this point in the history
First bug:
If during module deletion scenario, UnloadKmod function returns and
error for some reason, the GarbageCollectInUseLabels and UpdateNodeLabelsAndRecordEvents
onw't be called. This in turn means, that if there is a module that was
being loaded/unloaded in parallel, the actual action will happen
(load/unload from kernel), but the label in NMC and node won't be
updated.
Solution: always continue with GarbageCollectInUseLabels
and UpdateNodeLabelsAndRecordEvents and return a collective error later

Second bug:
In case ProcessModuleSpec from some spec/status has failed, the status
would not be deleted from the statusMap, which will cause it later to be
treated as orphaned status and the UnloadKmod to be called.
Solution: always delete status from statusMap, even in case of a failure
  • Loading branch information
yevgeny-shnaidman authored Oct 10, 2023
1 parent 6b25d47 commit 70d3993
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 9 deletions.
14 changes: 5 additions & 9 deletions internal/controllers/nmc_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
errs,
fmt.Errorf("error processing Module %s: %v", moduleNameKey, err),
)

continue
}

// deleting status always (even in case of an error), so that it won't be treated
// as an orphaned status later in reconciliation
delete(statusMap, moduleNameKey)
}

Expand All @@ -146,19 +146,15 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
}
}

if err := errors.Join(errs...); err != nil {
return reconcile.Result{}, fmt.Errorf("could not process orphan statuses: %v", err)
}

if err := r.helper.GarbageCollectInUseLabels(ctx, &nmcObj); err != nil {
return reconcile.Result{}, fmt.Errorf("could not garbage collect in-use labels for NMC %s: %v", req.NamespacedName, err)
errs = append(errs, fmt.Errorf("failed to GC in-use labels for NMC %s: %v", req.NamespacedName, err))
}

if err := r.helper.UpdateNodeLabelsAndRecordEvents(ctx, &nmcObj); err != nil {
return reconcile.Result{}, fmt.Errorf("could not update node's labels for NMC %s: %v", req.NamespacedName, err)
errs = append(errs, fmt.Errorf("could not update node's labels for NMC %s: %v", req.NamespacedName, err))
}

return ctrl.Result{}, nil
return ctrl.Result{}, errors.Join(errs...)
}

func (r *NMCReconciler) SetupWithManager(ctx context.Context, mgr manager.Manager) error {
Expand Down
59 changes: 59 additions & 0 deletions internal/controllers/nmc_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,65 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
BeZero(),
)
})

It("should complete all the reconcile functions and return combined error", func() {
const (
mod0Name = "mod0"
mod1Name = "mod1"
mod2Name = "mod2"
)
spec0 := kmmv1beta1.NodeModuleSpec{
ModuleItem: kmmv1beta1.ModuleItem{
Namespace: namespace,
Name: mod0Name,
},
}

status0 := kmmv1beta1.NodeModuleStatus{
ModuleItem: kmmv1beta1.ModuleItem{
Namespace: namespace,
Name: mod0Name,
},
}

status2 := kmmv1beta1.NodeModuleStatus{
ModuleItem: kmmv1beta1.ModuleItem{
Namespace: namespace,
Name: mod2Name,
},
}

nmc := &kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
Spec: kmmv1beta1.NodeModulesConfigSpec{
Modules: []kmmv1beta1.NodeModuleSpec{spec0},
},
Status: kmmv1beta1.NodeModulesConfigStatus{
Modules: []kmmv1beta1.NodeModuleStatus{status0, status2},
},
}

contextWithValueMatch := gomock.AssignableToTypeOf(
reflect.TypeOf((*context.Context)(nil)).Elem(),
)

gomock.InOrder(
kubeClient.
EXPECT().
Get(ctx, nmcNsn, &kmmv1beta1.NodeModulesConfig{}).
Do(func(_ context.Context, _ types.NamespacedName, kubeNmc ctrlclient.Object, _ ...ctrlclient.Options) {
*kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc
}),
wh.EXPECT().SyncStatus(ctx, nmc).Return(nil),
wh.EXPECT().ProcessModuleSpec(contextWithValueMatch, nmc, &spec0, &status0).Return(fmt.Errorf("some error")),
wh.EXPECT().ProcessUnconfiguredModuleStatus(contextWithValueMatch, nmc, &status2).Return(fmt.Errorf("some error")),
wh.EXPECT().GarbageCollectInUseLabels(ctx, nmc).Return(fmt.Errorf("some error")),
wh.EXPECT().UpdateNodeLabelsAndRecordEvents(ctx, nmc).Return(fmt.Errorf("some error")),
)

_, err := r.Reconcile(ctx, req)
Expect(err).ToNot(BeNil())
})
})

var moduleConfig = kmmv1beta1.ModuleConfig{
Expand Down

0 comments on commit 70d3993

Please sign in to comment.