From d3a991a397d80b4d64b6aa024e97aa3c2458aaf7 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 24 May 2024 23:17:45 +0000 Subject: [PATCH] relax validation on replicated jobs --- pkg/util/testing/wrappers.go | 8 +- pkg/webhooks/jobset_webhook.go | 8 ++ pkg/webhooks/jobset_webhook_test.go | 112 ++++++++++++++---- .../webhook/jobset_webhook_test.go | 66 +++++++---- 4 files changed, 151 insertions(+), 43 deletions(-) diff --git a/pkg/util/testing/wrappers.go b/pkg/util/testing/wrappers.go index 465d9af7d..1f50d5157 100644 --- a/pkg/util/testing/wrappers.go +++ b/pkg/util/testing/wrappers.go @@ -225,7 +225,13 @@ func (j *JobTemplateWrapper) CompletionMode(mode batchv1.CompletionMode) *JobTem return j } -// PodSpec Containers sets the pod template spec containers. +// PodTemplateSpec sets the pod template spec in a Job spec. +func (j *JobTemplateWrapper) PodTemplateSpec(podTemplateSpec corev1.PodTemplateSpec) *JobTemplateWrapper { + j.Spec.Template = podTemplateSpec + return j +} + +// PodSpec sets the pod spec in a Job template. func (j *JobTemplateWrapper) PodSpec(podSpec corev1.PodSpec) *JobTemplateWrapper { j.Spec.Template.Spec = podSpec return j diff --git a/pkg/webhooks/jobset_webhook.go b/pkg/webhooks/jobset_webhook.go index 06652755f..62ea4ce92 100644 --- a/pkg/webhooks/jobset_webhook.go +++ b/pkg/webhooks/jobset_webhook.go @@ -218,11 +218,19 @@ func (j *jobSetWebhook) ValidateUpdate(ctx context.Context, old, newObj runtime. return nil, fmt.Errorf("expected a JobSet from old object but got a %T", old) } mungedSpec := js.Spec.DeepCopy() + + // Allow pod template to be mutated for suspended JobSets. + // This is needed for integration with Kueue/DWS. if ptr.Deref(oldJS.Spec.Suspend, false) { for index := range js.Spec.ReplicatedJobs { + // Pod values which must be mutable for Kueue are defined here: https://github.com/kubernetes-sigs/kueue/blob/a50d395c36a2cb3965be5232162cf1fded1bdb08/apis/kueue/v1beta1/workload_types.go#L256-L260 + mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Annotations = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Annotations + mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Labels = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Labels mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Spec.NodeSelector = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Spec.NodeSelector + mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Spec.Tolerations = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Spec.Tolerations } } + // Note that SucccessPolicy and failurePolicy are made immutable via CEL. errs := apivalidation.ValidateImmutableField(mungedSpec.ReplicatedJobs, oldJS.Spec.ReplicatedJobs, field.NewPath("spec").Child("replicatedJobs")) errs = append(errs, apivalidation.ValidateImmutableField(mungedSpec.ManagedBy, oldJS.Spec.ManagedBy, field.NewPath("spec").Child("managedBy"))...) diff --git a/pkg/webhooks/jobset_webhook_test.go b/pkg/webhooks/jobset_webhook_test.go index 8fbed7ead..ac7c3cde2 100644 --- a/pkg/webhooks/jobset_webhook_test.go +++ b/pkg/webhooks/jobset_webhook_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -993,7 +994,7 @@ func TestValidateUpdate(t *testing.T) { }.ToAggregate(), }, { - name: "replicated jobs are immutable", + name: "replicated job pod template can be updated for suspended jobset", js: &jobset.JobSet{ ObjectMeta: validObjectMeta, Spec: jobset.JobSetSpec{ @@ -1002,17 +1003,56 @@ func TestValidateUpdate(t *testing.T) { Name: "test-jobset-replicated-job-0", Replicas: 2, Template: batchv1.JobTemplateSpec{ + // Adding an annotation. Spec: batchv1.JobSpec{ Parallelism: ptr.To[int32](2), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"key": "value"}, + }, + }, }, }, }, + }, + }, + }, + oldJs: &jobset.JobSet{ + ObjectMeta: validObjectMeta, + Spec: jobset.JobSetSpec{ + Suspend: ptr.To(true), + ReplicatedJobs: []jobset.ReplicatedJob{ { - Name: "test-jobset-replicated-job-1", - Replicas: 1, + Name: "test-jobset-replicated-job-0", + Replicas: 2, Template: batchv1.JobTemplateSpec{ Spec: batchv1.JobSpec{ - Parallelism: ptr.To[int32](1), + Parallelism: ptr.To[int32](2), + }, + }, + }, + }, + }, + }, + }, + { + name: "replicated job pod template cannot be updated for running jobset", + js: &jobset.JobSet{ + ObjectMeta: validObjectMeta, + Spec: jobset.JobSetSpec{ + ReplicatedJobs: []jobset.ReplicatedJob{ + { + Name: "test-jobset-replicated-job-0", + Replicas: 2, + Template: batchv1.JobTemplateSpec{ + // Adding an annotation. + Spec: batchv1.JobSpec{ + Parallelism: ptr.To[int32](2), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"key": "value"}, + }, + }, }, }, }, @@ -1022,31 +1062,60 @@ func TestValidateUpdate(t *testing.T) { oldJs: &jobset.JobSet{ ObjectMeta: validObjectMeta, Spec: jobset.JobSetSpec{ - Suspend: ptr.To(true), - ReplicatedJobs: validReplicatedJobs, + ReplicatedJobs: []jobset.ReplicatedJob{ + { + Name: "test-jobset-replicated-job-0", + Replicas: 2, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Parallelism: ptr.To[int32](2), + }, + }, + }, + }, }, }, want: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("replicatedJobs"), []jobset.ReplicatedJob{ - { - Name: "test-jobset-replicated-job-0", - Replicas: 2, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - Parallelism: ptr.To[int32](2), + field.Invalid(field.NewPath("spec").Child("replicatedJobs"), "", "field is immutable"), + }.ToAggregate(), + }, + { + name: "replicated job name cannot be updated", + js: &jobset.JobSet{ + ObjectMeta: validObjectMeta, + Spec: jobset.JobSetSpec{ + ReplicatedJobs: []jobset.ReplicatedJob{ + { + Name: "new-replicated-job-name", + Replicas: 2, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Parallelism: ptr.To[int32](2), + }, }, }, }, - { - Name: "test-jobset-replicated-job-1", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - Parallelism: ptr.To[int32](1), + }, + }, + oldJs: &jobset.JobSet{ + ObjectMeta: validObjectMeta, + Spec: jobset.JobSetSpec{ + Suspend: ptr.To(true), + ReplicatedJobs: []jobset.ReplicatedJob{ + { + Name: "test-jobset-replicated-job-0", + Replicas: 2, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Parallelism: ptr.To[int32](2), + }, }, }, }, - }, "field is immutable"), + }, + }, + want: field.ErrorList{ + field.Invalid(field.NewPath("spec").Child("replicatedJobs"), "", "field is immutable"), }.ToAggregate(), }, } @@ -1059,7 +1128,8 @@ func TestValidateUpdate(t *testing.T) { newObj := tc.js.DeepCopyObject() oldObj := tc.oldJs.DeepCopyObject() _, err = webhook.ValidateUpdate(context.TODO(), oldObj, newObj) - if diff := cmp.Diff(tc.want, err); diff != "" { + // Ignore bad value to keep test cases short and readable. + if diff := cmp.Diff(tc.want, err, cmpopts.IgnoreFields(field.Error{}, "BadValue")); diff != "" { t.Errorf("ValidateResources() mismatch (-want +got):\n%s", diff) } }) diff --git a/test/integration/webhook/jobset_webhook_test.go b/test/integration/webhook/jobset_webhook_test.go index afb964a4b..0c9121f8a 100644 --- a/test/integration/webhook/jobset_webhook_test.go +++ b/test/integration/webhook/jobset_webhook_test.go @@ -73,7 +73,7 @@ var _ = ginkgo.Describe("jobset webhook defaulting", func() { updateShouldFail bool } - ginkgo.DescribeTable("defaulting on jobset creation", + ginkgo.DescribeTable("jobset webhook tests", func(tc *testCase) { ctx := context.Background() @@ -219,26 +219,6 @@ var _ = ginkgo.Describe("jobset webhook defaulting", func() { }, updateShouldFail: true, }), - ginkgo.Entry("validate jobSet immutable for fields over than NodeSelector when jobSet is suspended", &testCase{ - makeJobSet: func(ns *corev1.Namespace) *testing.JobSetWrapper { - return testing.MakeJobSet("js-hostnames-non-indexed", ns.Name). - Suspend(true). - EnableDNSHostnames(true). - ReplicatedJob(testing.MakeReplicatedJob("rjob"). - Job(testing.MakeJobTemplate("job", ns.Name). - PodSpec(testing.TestPodSpec). - CompletionMode(batchv1.IndexedCompletion).Obj()). - Obj()) - }, - defaultsApplied: func(js *jobset.JobSet) bool { - return true - }, - updateJobSet: func(js *jobset.JobSet) { - js.Spec.ReplicatedJobs[0].Template.Spec.Template.Spec.Hostname = "test" - js.Spec.ReplicatedJobs[0].Template.Spec.Template.Spec.Subdomain = "test" - }, - updateShouldFail: true, - }), ginkgo.Entry("success policy defaults to all", &testCase{ makeJobSet: func(ns *corev1.Namespace) *testing.JobSetWrapper { return testing.MakeJobSet("success-policy", ns.Name). @@ -354,5 +334,49 @@ var _ = ginkgo.Describe("jobset webhook defaulting", func() { }, updateShouldFail: true, }), + ginkgo.Entry("updating pod template in suspended jobset is allowed", &testCase{ + makeJobSet: func(ns *corev1.Namespace) *testing.JobSetWrapper { + return testing.MakeJobSet("suspended", ns.Name). + Suspend(true). + ReplicatedJob(testing.MakeReplicatedJob("rjob"). + Job(testing.MakeJobTemplate("job", ns.Name). + CompletionMode(batchv1.IndexedCompletion). + PodTemplateSpec(corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"old": "annotation"}, + }, + Spec: testing.TestPodSpec, + }).Obj()). + Obj()) + }, + defaultsApplied: func(js *jobset.JobSet) bool { + return true + }, + updateJobSet: func(js *jobset.JobSet) { + js.Spec.ReplicatedJobs[0].Template.Spec.Template.Annotations["new"] = "annotation" + }, + }), + ginkgo.Entry("updating pod template in running jobset is not allowed", &testCase{ + makeJobSet: func(ns *corev1.Namespace) *testing.JobSetWrapper { + return testing.MakeJobSet("suspended", ns.Name). + ReplicatedJob(testing.MakeReplicatedJob("rjob"). + Job(testing.MakeJobTemplate("job", ns.Name). + CompletionMode(batchv1.IndexedCompletion). + PodTemplateSpec(corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"old": "annotation"}, + }, + Spec: testing.TestPodSpec, + }).Obj()). + Obj()) + }, + defaultsApplied: func(js *jobset.JobSet) bool { + return true + }, + updateJobSet: func(js *jobset.JobSet) { + js.Spec.ReplicatedJobs[0].Template.Spec.Template.Annotations["new"] = "annotation" + }, + updateShouldFail: true, + }), ) // end of DescribeTable }) // end of Describe