From 20178b4e43d48381cd01ce5923fc26d2eeccbb1a Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Tue, 24 Dec 2024 14:56:00 +0200 Subject: [PATCH 1/4] hack: implement job config generator Co-authored-by: Patrick Ohly --- hack/generate-jobs.py | 149 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 hack/generate-jobs.py diff --git a/hack/generate-jobs.py b/hack/generate-jobs.py new file mode 100755 index 000000000000..9a7b0eec1e1d --- /dev/null +++ b/hack/generate-jobs.py @@ -0,0 +1,149 @@ +# Copyright 2025 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate job configuration files.""" + +import argparse +import configparser +import filecmp +import os +import pathlib +import shutil +import sys +import tempfile +import typing + +import jinja2 + + +def generate(patterns: typing.List[str], + only_verify: bool, overwrite: bool) -> typing.List[str]: + """ + Generate job configuration files. + Return list of errors. + """ + errors = [] + for pattern in patterns: + paths = pathlib.Path(".").glob(pattern) + if not paths: + errors.append(f"No files found for pattern {pattern}") + continue + for path in paths: + errs = generate_one(path, only_verify, overwrite) + if errs: + errors.extend(errs) + return errors + + +def generate_one(path: pathlib.Path, + only_verify: bool, overwrite: bool) -> typing.List[str]: + """ + Generate job configuration files from one template. + Return list of errors. + """ + config = configparser.ConfigParser() + config.read_file(path.open()) + + template_name = config.get("DEFAULT", "template") + template_path = path.parent / template_name + errors = [] + with template_path.open() as inp: + template = jinja2.Template(inp.read(), lstrip_blocks=True) + pairs = config.get("DEFAULT", "files").split(",") + for name, job in (pair.split(":") for pair in pairs): + tmp = tempfile.NamedTemporaryFile( + "w", + prefix=f"{template_name}.", + delete=False, + ) + with tmp: + header = ( + "# GENERATED FILE - DO NOT EDIT!\n#\n# " + f"Instead, modify {template_name} and run `make generate-jobs`.\n" + ) + for section in config.sections(): + tmp.write( + template.render( + config[section], + file=name, + job_name=job.format(section=section), + header=header, + ) + ) + header = "" + + out = template_path.parent / f"{template_path.stem}-{name}.yaml" + if not os.path.exists(out): + if only_verify: + os.unlink(tmp.name) + errors.append(f"Can't verify content: {out} doesn't exist") + continue + else: + equal = filecmp.cmp(out, tmp.name, shallow=False) + if only_verify: + os.unlink(tmp.name) + if not equal: + errors.append( + f"Generated content for {out} differs from existing" + ) + continue + if equal: + os.unlink(tmp.name) + continue + if not overwrite: + os.unlink(tmp.name) + errors.append( + f"Generated content for {out} differs from existing, " + "use --overwrite to update" + ) + continue + shutil.move(tmp.name, out) + + return errors + + +def main(argv): + """Entry point.""" + parser = argparse.ArgumentParser( + prog="Jobs Generator", + description="Generate job configuration files from templates", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "pattern", + nargs="+", + help="config path pattern in the Python pathlib pattern language format:\n" + "https://docs.python.org/3/library/pathlib.html#pattern-language,\n" + "for example: config/jobs/**/*.generate.conf", + ) + parser.add_argument( + "--only-verify", + action="store_true", + help="Only verify if generated files are the same as existing", + ) + parser.add_argument( + "--overwrite", action="store_true", help="Owerwrite output files" + ) + args = parser.parse_args(argv) + + errors = generate(args.pattern, args.only_verify, args.overwrite) + if errors: + for err in errors: + print(err, file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) From b9f1edb2cd743433069e1fb89a95541c54c8287e Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Tue, 24 Dec 2024 14:57:03 +0200 Subject: [PATCH 2/4] sig-node: generate DRA jobs Co-authored-by: Patrick Ohly --- .../jobs/kubernetes/sig-node/dra-canary.yaml | 4 +- ...c-resource-allocation.yaml => dra-ci.yaml} | 104 +++---- .../kubernetes/sig-node/dra-presubmit.yaml | 254 ++++++++++++++++ .../kubernetes/sig-node/dra.generate.conf | 59 ++++ config/jobs/kubernetes/sig-node/dra.jinja | 129 +++++++++ .../sig-node/sig-node-presubmit.yaml | 273 ------------------ .../kubernetes/presubmits/config.yaml | 5 - 7 files changed, 490 insertions(+), 338 deletions(-) rename config/jobs/kubernetes/sig-node/{dynamic-resource-allocation.yaml => dra-ci.yaml} (82%) create mode 100644 config/jobs/kubernetes/sig-node/dra-presubmit.yaml create mode 100644 config/jobs/kubernetes/sig-node/dra.generate.conf create mode 100644 config/jobs/kubernetes/sig-node/dra.jinja diff --git a/config/jobs/kubernetes/sig-node/dra-canary.yaml b/config/jobs/kubernetes/sig-node/dra-canary.yaml index 515bf0c9482d..b14f90ad9662 100644 --- a/config/jobs/kubernetes/sig-node/dra-canary.yaml +++ b/config/jobs/kubernetes/sig-node/dra-canary.yaml @@ -95,7 +95,7 @@ presubmits: cpu: 2 memory: 9Gi - - name: pull-kubernetes-node-e2e-cgrpv1-crio-dra-canary + - name: pull-kubernetes-node-e2e-crio-cgrpv1-dra-canary cluster: k8s-infra-prow-build skip_branches: - release-\d+\.\d+ # per-release image @@ -147,7 +147,7 @@ presubmits: cpu: 2 memory: 9Gi - - name: pull-kubernetes-node-e2e-cgrpv2-crio-dra-canary + - name: pull-kubernetes-node-e2e-crio-cgrpv2-dra-canary cluster: k8s-infra-prow-build skip_branches: - release-\d+\.\d+ # per-release image diff --git a/config/jobs/kubernetes/sig-node/dynamic-resource-allocation.yaml b/config/jobs/kubernetes/sig-node/dra-ci.yaml similarity index 82% rename from config/jobs/kubernetes/sig-node/dynamic-resource-allocation.yaml rename to config/jobs/kubernetes/sig-node/dra-ci.yaml index 7bcf6c1c85c4..ae841fc3fae2 100644 --- a/config/jobs/kubernetes/sig-node/dynamic-resource-allocation.yaml +++ b/config/jobs/kubernetes/sig-node/dra-ci.yaml @@ -1,43 +1,43 @@ +# GENERATED FILE - DO NOT EDIT! +# +# Instead, modify dra.jinja and run `make generate-jobs`. periodics: - # This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) - # on a kind cluster with containerd updated to a version with CDI support. - name: ci-kind-dra cluster: eks-prow-build-cluster interval: 6h + labels: + preset-service-account: "true" + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" annotations: testgrid-dashboards: sig-node-dynamic-resource-allocation - testgrid-tab-name: ci-kind-dra description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind - testgrid-alert-email: patrick.ohly@intel.com + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com fork-per-release: "true" decorate: true decoration_config: - timeout: 3h - labels: - preset-service-account: "true" - preset-dind-enabled: "true" - preset-kind-volume-mounts: "true" + timeout: 90m extra_refs: - org: kubernetes repo: kubernetes base_ref: master path_alias: k8s.io/kubernetes + workdir: true spec: containers: - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master command: - runner.sh args: - - /bin/sh - - -xc - - > - make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" && - curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind && - kind build node-image --image=dra/node:latest . && - trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT && - kind create cluster --retain --config test/e2e/dra/kind.yaml --image dra/node:latest && + - /bin/bash + - -xce + - | + make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" + curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind + kind build node-image --image=dra/node:latest . + trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT + kind create cluster --retain --config test/e2e/dra/kind.yaml --image dra/node:latest KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=2h30m hack/ginkgo-e2e.sh -ginkgo.label-filter='Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky' - # docker-in-docker needs privileged mode securityContext: privileged: true @@ -49,31 +49,27 @@ periodics: cpu: 2 memory: 9Gi - # This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently alpha, soon beta) - # on a kind cluster with containerd updated to a version with CDI support. - # - # Compared to ci-kind-dra, this one enables all DRA-related features. - name: ci-kind-dra-all cluster: eks-prow-build-cluster interval: 6h + labels: + preset-service-account: "true" + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" annotations: testgrid-dashboards: sig-node-dynamic-resource-allocation - testgrid-tab-name: ci-kind-dra-all description: Runs E2E tests for Dynamic Resource Allocation alpha and beta features against a Kubernetes master cluster created with sigs.k8s.io/kind - testgrid-alert-email: patrick.ohly@intel.com + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com fork-per-release: "true" decorate: true decoration_config: - timeout: 3h - labels: - preset-service-account: "true" - preset-dind-enabled: "true" - preset-kind-volume-mounts: "true" + timeout: 90m extra_refs: - org: kubernetes repo: kubernetes base_ref: master path_alias: k8s.io/kubernetes + workdir: true spec: containers: - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master @@ -81,9 +77,8 @@ periodics: - runner.sh args: - /bin/bash - - -xc + - -xce - | - set -ex make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind kind build node-image --image=dra/node:latest . @@ -94,7 +89,6 @@ periodics: # Those additional features are not in kind.yaml, but they can be added at the end. kind create cluster --retain --config <(cat test/e2e/dra/kind.yaml; for feature in ${features}; do echo " ${feature}: true"; done) --image dra/node:latest KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=1h hack/ginkgo-e2e.sh -ginkgo.label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Alpha, Beta, DynamicResourceAllocation$(for feature in ${features}; do echo , ${feature}; done)} && !Flaky && !Slow" - # docker-in-docker needs privileged mode securityContext: privileged: true @@ -106,19 +100,17 @@ periodics: cpu: 2 memory: 9Gi - # This job runs e2e_node.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) - - name: ci-node-e2e-cgrpv1-crio-dra + - name: ci-node-e2e-crio-cgrpv1-dra cluster: k8s-infra-prow-build interval: 6h - annotations: - testgrid-dashboards: sig-node-cri-o, sig-node-dynamic-resource-allocation - testgrid-tab-name: ci-node-e2e-cgrpv1-crio-dra - description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v1 - testgrid-alert-email: eduard.bartosh@intel.com,patrick.ohly@intel.com - fork-per-release: "true" labels: preset-service-account: "true" preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-cri-o + description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v1 + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" decorate: true decoration_config: timeout: 90m @@ -145,7 +137,7 @@ periodics: - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' - --node-tests=true - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky"' + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - --timeout=65m - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv1-serial.yaml env: @@ -161,19 +153,17 @@ periodics: cpu: 2 memory: 9Gi - # This job runs e2e_node.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) - - name: ci-node-e2e-cgrpv2-crio-dra + - name: ci-node-e2e-crio-cgrpv2-dra cluster: k8s-infra-prow-build interval: 6h - annotations: - testgrid-dashboards: sig-node-cri-o, sig-node-dynamic-resource-allocation - description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v2 - testgrid-tab-name: ci-node-e2e-cgrpv2-crio-dra - testgrid-alert-email: eduard.bartosh@intel.com,patrick.ohly@intel.com - fork-per-release: "true" labels: preset-service-account: "true" preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-cri-o + description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v2 + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" decorate: true decoration_config: timeout: 90m @@ -200,7 +190,7 @@ periodics: - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' - --node-tests=true - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky"' + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - --timeout=65m - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv2-serial.yaml env: @@ -216,19 +206,17 @@ periodics: cpu: 2 memory: 9Gi - # This job runs the same tests as ci-node-e2e-crio-dra with Containerd 1.7 runtime - name: ci-node-e2e-containerd-1-7-dra cluster: k8s-infra-prow-build interval: 6h - annotations: - testgrid-dashboards: sig-node-dynamic-resource-allocation - testgrid-tab-name: ci-node-e2e-containerd-1-7-dra - description: Runs E2E node tests for Dynamic Resource Allocation beta features with containerd - testgrid-alert-email: eduard.bartosh@intel.com,patrick.ohly@intel.com - fork-per-release: "true" labels: preset-service-account: "true" preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-containerd + description: Runs E2E node tests for Dynamic Resource Allocation beta features with containerd + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" decorate: true decoration_config: timeout: 90m @@ -254,7 +242,7 @@ periodics: - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/containerd/containerd.sock --container-runtime-process-name=/usr/local/bin/containerd --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/containerd.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"containerd.log\", \"journalctl\": [\"-u\", \"containerd\"]}"' - --node-tests=true - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky"' + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - --timeout=65m - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/dra/image-config-containerd-1.7.yaml resources: diff --git a/config/jobs/kubernetes/sig-node/dra-presubmit.yaml b/config/jobs/kubernetes/sig-node/dra-presubmit.yaml new file mode 100644 index 000000000000..08da992453b7 --- /dev/null +++ b/config/jobs/kubernetes/sig-node/dra-presubmit.yaml @@ -0,0 +1,254 @@ +# GENERATED FILE - DO NOT EDIT! +# +# Instead, modify dra.jinja and run `make generate-jobs`. +presubmits: + kubernetes/kubernetes: + - name: pull-kubernetes-kind-dra + cluster: eks-prow-build-cluster + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + run_if_changed: /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go + optional: true + labels: + preset-service-account: "true" + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits + description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" + decorate: true + decoration_config: + timeout: 90m + path_alias: k8s.io/kubernetes + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + args: + - /bin/bash + - -xce + - | + make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" + curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind + kind build node-image --image=dra/node:latest . + trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT + kind create cluster --retain --config test/e2e/dra/kind.yaml --image dra/node:latest + KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=2h30m hack/ginkgo-e2e.sh -ginkgo.label-filter='Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky' + # docker-in-docker needs privileged mode + securityContext: + privileged: true + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi + + - name: pull-kubernetes-kind-dra-all + cluster: eks-prow-build-cluster + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + run_if_changed: /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go + optional: true + labels: + preset-service-account: "true" + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits + description: Runs E2E tests for Dynamic Resource Allocation alpha and beta features against a Kubernetes master cluster created with sigs.k8s.io/kind + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" + decorate: true + decoration_config: + timeout: 90m + path_alias: k8s.io/kubernetes + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + args: + - /bin/bash + - -xce + - | + make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" + curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind + kind build node-image --image=dra/node:latest . + trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT + # Which DRA features exist can change over time. + features=( $(grep '"DRA' pkg/features/kube_features.go | sed 's/.*"\(.*\)"/\1/') ) + echo "Enabling DRA feature(s): ${features[*]}." + # Those additional features are not in kind.yaml, but they can be added at the end. + kind create cluster --retain --config <(cat test/e2e/dra/kind.yaml; for feature in ${features}; do echo " ${feature}: true"; done) --image dra/node:latest + KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=1h hack/ginkgo-e2e.sh -ginkgo.label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Alpha, Beta, DynamicResourceAllocation$(for feature in ${features}; do echo , ${feature}; done)} && !Flaky && !Slow" + # docker-in-docker needs privileged mode + securityContext: + privileged: true + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi + + - name: pull-kubernetes-node-e2e-crio-cgrpv1-dra + cluster: k8s-infra-prow-build + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + run_if_changed: (/dra/|/dynamicresources/|/resourceclaim/|/deviceclass/|/resourceslice/|/resourceclaimtemplate/|/dynamic-resource-allocation/|/pkg/apis/resource/|/api/resource/|/test/e2e_node/dra_).*\.(go|yaml) + optional: true + skip_report: false + labels: + preset-service-account: "true" + preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits, sig-node-cri-o + description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v1 + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" + decorate: true + decoration_config: + timeout: 90m + path_alias: k8s.io/kubernetes + extra_refs: + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + - /workspace/scenarios/kubernetes_e2e.py + args: + - --deployment=node + - --env=KUBE_SSH_USER=core + - --gcp-zone=us-west1-b + - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' + - --node-tests=true + - --provider=gce + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' + - --timeout=65m + - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv1-serial.yaml + env: + - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE + value: "1" + - name: GOPATH + value: /go + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi + + - name: pull-kubernetes-node-e2e-crio-cgrpv2-dra + cluster: k8s-infra-prow-build + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + optional: true + skip_report: false + labels: + preset-service-account: "true" + preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits, sig-node-cri-o + description: Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v2 + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" + decorate: true + decoration_config: + timeout: 90m + path_alias: k8s.io/kubernetes + extra_refs: + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + - /workspace/scenarios/kubernetes_e2e.py + args: + - --deployment=node + - --env=KUBE_SSH_USER=core + - --gcp-zone=us-west1-b + - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' + - --node-tests=true + - --provider=gce + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' + - --timeout=65m + - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv2-serial.yaml + env: + - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE + value: "1" + - name: GOPATH + value: /go + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi + + - name: pull-kubernetes-node-e2e-containerd-1-7-dra + cluster: k8s-infra-prow-build + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + optional: true + skip_report: false + labels: + preset-service-account: "true" + preset-k8s-ssh: "true" + annotations: + testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits, sig-node-containerd + description: Runs E2E node tests for Dynamic Resource Allocation beta features with containerd + testgrid-alert-email: eduard.bartosh@intel.com, patrick.ohly@intel.com + fork-per-release: "true" + decorate: true + decoration_config: + timeout: 90m + path_alias: k8s.io/kubernetes + extra_refs: + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + - /workspace/scenarios/kubernetes_e2e.py + args: + - --deployment=node + - --gcp-zone=us-west1-b + - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/containerd/containerd.sock --container-runtime-process-name=/usr/local/bin/containerd --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/containerd.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"containerd.log\", \"journalctl\": [\"-u\", \"containerd\"]}"' + - --node-tests=true + - --provider=gce + - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' + - --timeout=65m + - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/dra/image-config-containerd-1.7.yaml + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi diff --git a/config/jobs/kubernetes/sig-node/dra.generate.conf b/config/jobs/kubernetes/sig-node/dra.generate.conf new file mode 100644 index 000000000000..ad14c0a4ecd9 --- /dev/null +++ b/config/jobs/kubernetes/sig-node/dra.generate.conf @@ -0,0 +1,59 @@ +[DEFAULT] +# `template` is a mandatory field that specifies the job template to use +template = dra.jinja +# `files` is a mandatory comma separated list of pairs : which +# specifies which YAML files and job names need to be generated. +# The name of each generated file is `-.yaml`, for example dra-canary.yaml. +# The name of each generated job is made by substituting `{section}` placeholder with section name, e.g. ci-kind-dra. +files = canary:pull-kubernetes-{section}-canary,presubmit:pull-kubernetes-{section},ci:ci-{section} +# k8s-infra-prow-build is required for node jobs +# attempt to run node jobs on eks-prow-build-cluster fails +# with "boskos failed to acquire project: resources not found" error +cluster = k8s-infra-prow-build +interval = 6h +testgrid_dashboards = sig-node-dynamic-resource-allocation +testgrid_alert_email = eduard.bartosh@intel.com, patrick.ohly@intel.com +timeout = 90m +label_filter = Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow + +# This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) +# on a kind cluster with containerd updated to a version with CDI support. +[kind-dra] +description = Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind +use_dind = true +cluster = eks-prow-build-cluster +run_if_changed = /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go + +# This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently alpha, soon beta) +# on a kind cluster with containerd updated to a version with CDI support. +# +# Compared to ci-kind-dra, this one enables all DRA-related features. +[kind-dra-all] +description = Runs E2E tests for Dynamic Resource Allocation alpha and beta features against a Kubernetes master cluster created with sigs.k8s.io/kind +cluster = eks-prow-build-cluster +all_features = true +use_dind = true +run_if_changed = /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go + +# This job runs e2e_node.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) +[node-e2e-crio-cgrpv1-dra] +job_type = node +description = Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v1 +image_config_file = /home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv1-serial.yaml +inject_ssh_public_key = true +# Automatically testing with one container runtime in one configuration is sufficient to detect basic problems in kubelet early. +# CRI-O was picked because it was solid for testing so far. +run_if_changed = (/dra/|/dynamicresources/|/resourceclaim/|/deviceclass/|/resourceslice/|/resourceclaimtemplate/|/dynamic-resource-allocation/|/pkg/apis/resource/|/api/resource/|/test/e2e_node/dra_).*\.(go|yaml) + +# This job is the same as ci-node-e2e-cgrpv1-crio-dra, but for cgroup v2 +[node-e2e-crio-cgrpv2-dra] +job_type = node +description = Runs E2E node tests for Dynamic Resource Allocation beta features with CRI-O using cgroup v2 +image_config_file = /home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv2-serial.yaml +inject_ssh_public_key = true + +# This job runs the same tests as ci-node-e2e-crio-dra with Containerd 1.7 runtime +[node-e2e-containerd-1-7-dra] +job_type = node +description = Runs E2E node tests for Dynamic Resource Allocation beta features with containerd +image_config_file = /home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/dra/image-config-containerd-1.7.yaml diff --git a/config/jobs/kubernetes/sig-node/dra.jinja b/config/jobs/kubernetes/sig-node/dra.jinja new file mode 100644 index 000000000000..2252fca91b18 --- /dev/null +++ b/config/jobs/kubernetes/sig-node/dra.jinja @@ -0,0 +1,129 @@ +{%- if header %}{{header}} +{%- if file == "ci" %}periodics: +{%- else %}presubmits: + kubernetes/kubernetes: +{%- endif %} +{%- endif %} +{%- if file != "ci" %} +{%- set testgrid_dashboards = testgrid_dashboards + ", sig-node-presubmits" %} +{%- endif %} +{%- if "crio" in job_name %} +{%- set testgrid_dashboards = testgrid_dashboards + ", sig-node-cri-o" %} +{%- set runtime = "crio" %} +{%- endif %} +{%- if "containerd" in job_name %} +{%- set testgrid_dashboards = testgrid_dashboards + ", sig-node-containerd" %} +{%- set runtime = "containerd" %} +{%- endif %} + - name: {{job_name}} + cluster: {{cluster}} + {%- if file == "ci" %} + interval: {{interval}} + {%- else %} + skip_branches: + - release-\d+\.\d+ # per-release image + always_run: false + {%- if run_if_changed and file == "presubmit" %} + run_if_changed: {{run_if_changed}} + {%- endif %} + optional: true + {%- if job_type == "node" %} + skip_report: false + {%- endif %} + {%- endif %} + labels: + preset-service-account: "true" + {%- if use_dind == "true" %} + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" + {%- endif %} + {%- if job_type == "node" %} + preset-k8s-ssh: "true" + {%- endif %} + annotations: + testgrid-dashboards: {{testgrid_dashboards}} + description: {{description}} + testgrid-alert-email: {{testgrid_alert_email}} + {%- if file != "canary" %} + fork-per-release: "true" + {%- endif %} + decorate: true + decoration_config: + timeout: {{timeout}} + {%- if file == "ci" %} + extra_refs: + - org: kubernetes + repo: kubernetes + base_ref: master + path_alias: k8s.io/kubernetes + workdir: true + {%- else %} + path_alias: k8s.io/kubernetes + {%- endif %} + {%- if job_type == "node" %} + {%- if file != "ci" %} + extra_refs: + {%- endif %} + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + {%- endif %} + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master + command: + - runner.sh + {%- if job_type == "node" %} + - /workspace/scenarios/kubernetes_e2e.py + args: + - --deployment=node + {%- if inject_ssh_public_key == "true" %} + - --env=KUBE_SSH_USER=core + {%- endif %} + - --gcp-zone=us-west1-b + - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/{{runtime}}/{{runtime}}.sock --container-runtime-process-name=/usr/local/bin/{{runtime}} --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/{{runtime}}.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"{{runtime}}.log\", \"journalctl\": [\"-u\", \"{{runtime}}\"]}"' + - --node-tests=true + - --provider=gce + - '--test_args=--timeout=1h --label-filter="{{label_filter}}"' + - --timeout=65m + - --node-args=--image-config-file={{image_config_file}} + {%- if inject_ssh_public_key == "true" %} + env: + - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE + value: "1" + - name: GOPATH + value: /go + {%- endif %} + {%- else %} + args: + - /bin/bash + - -xce + - | + make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" + curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind + kind build node-image --image=dra/node:latest . + trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT + {%- if all_features %} + # Which DRA features exist can change over time. + features=( $(grep '"DRA' pkg/features/kube_features.go | sed 's/.*"\(.*\)"/\1/') ) + echo "Enabling DRA feature(s): ${features[*]}." + # Those additional features are not in kind.yaml, but they can be added at the end. + kind create cluster --retain --config <(cat test/e2e/dra/kind.yaml; for feature in ${features}; do echo " ${feature}: true"; done) --image dra/node:latest + KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=1h hack/ginkgo-e2e.sh -ginkgo.label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Alpha, Beta, DynamicResourceAllocation$(for feature in ${features}; do echo , ${feature}; done)} && !Flaky && !Slow" + {%- else %} + kind create cluster --retain --config test/e2e/dra/kind.yaml --image dra/node:latest + KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=2h30m hack/ginkgo-e2e.sh -ginkgo.label-filter='Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky' + {%- endif %} + # docker-in-docker needs privileged mode + securityContext: + privileged: true + {%- endif %} + resources: + limits: + cpu: 2 + memory: 9Gi + requests: + cpu: 2 + memory: 9Gi + diff --git a/config/jobs/kubernetes/sig-node/sig-node-presubmit.yaml b/config/jobs/kubernetes/sig-node/sig-node-presubmit.yaml index d9c42f4caaf5..68b55d4315bd 100644 --- a/config/jobs/kubernetes/sig-node/sig-node-presubmit.yaml +++ b/config/jobs/kubernetes/sig-node/sig-node-presubmit.yaml @@ -2880,121 +2880,6 @@ presubmits: cpu: 4 memory: 6Gi - # This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta) - # on a kind cluster with containerd updated to a version with CDI support. - - name: pull-kubernetes-kind-dra - cluster: k8s-infra-prow-build - skip_branches: - - release-\d+\.\d+ # per-release image - annotations: - testgrid-dashboards: sig-node-presubmits, sig-node-dynamic-resource-allocation - testgrid-tab-name: pr-kind-dra - decorate: true - path_alias: k8s.io/kubernetes - # Not relevant for most PRs. - always_run: false - # This covers most of the code related to dynamic resource allocation. - # Periodic variant: ci-kind-dra - run_if_changed: /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go - optional: true - decoration_config: - timeout: 90m - labels: - preset-service-account: "true" - preset-dind-enabled: "true" - preset-kind-volume-mounts: "true" - spec: - containers: - - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master - command: - - runner.sh - args: - - /bin/sh - - -xc - - > - make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" && - curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind && - kind build node-image --image=dra/node:latest . && - trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT && - kind create cluster --retain --config test/e2e/dra/kind.yaml --image dra/node:latest && - KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=1h hack/ginkgo-e2e.sh -ginkgo.label-filter='Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow' - - # docker-in-docker needs privileged mode - securityContext: - privileged: true - resources: - requests: - # these are both a bit below peak usage during build - # this is mostly for building kubernetes - memory: "9000Mi" - # during the tests more like 3-20m is used - cpu: 2000m - limits: - memory: "9000Mi" - cpu: 2000m - - # This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (partly alpha, partly beta) - # on a kind cluster with containerd updated to a version with CDI support. - # - # Compared to pull-kubernetes-dra, this one enables all DRA-related features. - - name: pull-kubernetes-kind-dra-all - cluster: k8s-infra-prow-build - skip_branches: - - release-\d+\.\d+ # per-release image - annotations: - testgrid-dashboards: sig-node-presubmits, sig-node-dynamic-resource-allocation - testgrid-tab-name: pr-kind-dra-all - decorate: true - path_alias: k8s.io/kubernetes - # Not relevant for most PRs. - always_run: false - # This covers most of the code related to dynamic resource allocation. - # Periodic variant: ci-kind-dra-all - run_if_changed: /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go - # The tests might still be flaky or this job might get triggered accidentally for - # an unrelated PR. - optional: true - decoration_config: - timeout: 90m - labels: - preset-service-account: "true" - preset-dind-enabled: "true" - preset-kind-volume-mounts: "true" - spec: - containers: - - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master - command: - - runner.sh - args: - - /bin/bash - - -xc - - | - set -ex - make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test" - curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind - kind build node-image --image=dra/node:latest . - trap 'kind export logs "${ARTIFACTS}/kind"; kind delete cluster' EXIT - # Which DRA features exist depends on the PR that is being tested. - features=( $(grep '"DRA' pkg/features/kube_features.go | sed 's/.*"\(.*\)"/\1/') ) - echo "Enabling DRA feature(s): ${features[*]}." - # Those additional features are not in kind.yaml, but they can be added at the end. - kind create cluster --retain --config <(cat test/e2e/dra/kind.yaml; for feature in ${features}; do echo " ${feature}: true"; done) --image dra/node:latest - KUBERNETES_PROVIDER=local KUBECONFIG=${HOME}/.kube/config GINKGO_PARALLEL_NODES=8 E2E_REPORT_DIR=${ARTIFACTS} GINKGO_TIMEOUT=1h hack/ginkgo-e2e.sh -ginkgo.label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Alpha, Beta, DynamicResourceAllocation$(for feature in ${features}; do echo , ${feature}; done)} && !Flaky && !Slow" - - # docker-in-docker needs privileged mode - securityContext: - privileged: true - resources: - requests: - # these are both a bit below peak usage during build - # this is mostly for building kubernetes - memory: "9000Mi" - # during the tests more like 3-20m is used - cpu: 2000m - limits: - memory: "9000Mi" - cpu: 2000m - - name: pull-kubernetes-e2e-gce-kubelet-credential-provider cluster: k8s-infra-prow-build always_run: false @@ -3867,61 +3752,6 @@ presubmits: cpu: 4 memory: 6Gi - - name: pull-kubernetes-node-e2e-crio-cgrpv1-dra - cluster: k8s-infra-prow-build - skip_branches: - - release-\d+\.\d+ # per-release image - always_run: false - # Automatically testing with one container runtime in one configuration is sufficient to detect basic problems in kubelet early. - # CRI-O was picked because it was solid for testing so far. - # Periodic variant: ci-node-e2e-crio-cgrpv1-dra-features - run_if_changed: (/dra/|/dynamicresources/|/resourceclaim/|/deviceclass/|/resourceslice/|/resourceclaimtemplate/|/dynamic-resource-allocation/|/pkg/apis/resource/|/api/resource/|/test/e2e_node/dra_).*\.(go|yaml) - optional: true - skip_report: false - labels: - preset-service-account: "true" - preset-k8s-ssh: "true" - preset-pull-kubernetes-e2e: "true" - preset-pull-kubernetes-e2e-gce: "true" - annotations: - testgrid-dashboards: sig-node-cri-o, sig-node-presubmits, sig-node-dynamic-resource-allocation - testgrid-tab-name: pr-node-kubelet-crio-cgrpv1-dra - decorate: true - decoration_config: - timeout: 90m - path_alias: k8s.io/kubernetes - extra_refs: - - org: kubernetes - repo: test-infra - base_ref: master - path_alias: k8s.io/test-infra - spec: - containers: - - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master - command: - - runner.sh - - /workspace/scenarios/kubernetes_e2e.py - args: - - --deployment=node - - --env=KUBE_SSH_USER=core - - --gcp-zone=us-west1-b - - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' - - --node-tests=true - - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - - --timeout=65m - - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv1-serial.yaml - env: - - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE - value: "1" - resources: - requests: - cpu: 4 - memory: 6Gi - limits: - cpu: 4 - memory: 6Gi - - name: pull-kubernetes-node-e2e-crio-cgrpv1-dra-kubetest2 # experimental alternative to pull-kubernetes-node-e2e-crio-cgrpv1-dra cluster: k8s-infra-prow-build # explicitly needs /test pull-kubernetes-node-e2e-crio-cgrpv1-dra-kubetest2 to run @@ -3978,61 +3808,6 @@ presubmits: - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE value: "1" - - name: pull-kubernetes-node-e2e-crio-cgrpv2-dra - cluster: k8s-infra-prow-build - skip_branches: - - release-\d+\.\d+ # per-release image - always_run: false - # Automatically testing with one container runtime in one configuration is sufficient to detect basic problems in kubelet early. - # CRI-O was picked because it was solid for testing so far. - # Periodic variant: ci-node-e2e-cgrpv2-crio-dra - # run_if_changed: (/dra/|/dynamicresources/|/resourceclaim/|/deviceclass/|/resourceslice/|/resourceclaimtemplate/|/dynamic-resource-allocation/|/pkg/apis/resource/|/api/resource/|/test/e2e_node/dra_).*\.(go|yaml) - optional: true - skip_report: false - labels: - preset-service-account: "true" - preset-k8s-ssh: "true" - preset-pull-kubernetes-e2e: "true" - preset-pull-kubernetes-e2e-gce: "true" - annotations: - testgrid-dashboards: sig-node-cri-o, sig-node-presubmits, sig-node-dynamic-resource-allocation - testgrid-tab-name: pr-node-kubelet-crio-cgrpv2-dra - decorate: true - decoration_config: - timeout: 90m - path_alias: k8s.io/kubernetes - extra_refs: - - org: kubernetes - repo: test-infra - base_ref: master - path_alias: k8s.io/test-infra - spec: - containers: - - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master - command: - - runner.sh - - /workspace/scenarios/kubernetes_e2e.py - args: - - --deployment=node - - --env=KUBE_SSH_USER=core - - --gcp-zone=us-west1-b - - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///var/run/crio/crio.sock --container-runtime-process-name=/usr/local/bin/crio --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/crio.service --kubelet-cgroups=/system.slice/kubelet.service" --extra-log="{\"name\": \"crio.log\", \"journalctl\": [\"-u\", \"crio\"]}"' - - --node-tests=true - - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - - --timeout=65m - - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/crio/latest/image-config-cgroupv2-serial.yaml - env: - - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE - value: "1" - resources: - requests: - cpu: 4 - memory: 6Gi - limits: - cpu: 4 - memory: 6Gi - - name: pull-kubernetes-node-e2e-crio-cgrpv2-dra-kubetest2 # experimental alternative to pull-kubernetes-node-e2e-crio-cgrpv2-dra cluster: k8s-infra-prow-build # explicitly needs /test pull-kubernetes-node-e2e-crio-cgrpv2-dra-kubetest2 to run @@ -4087,54 +3862,6 @@ presubmits: value: core - name: IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE value: "1" - - name: pull-kubernetes-node-e2e-containerd-1-7-dra - cluster: k8s-infra-prow-build - skip_branches: - - release-\d+\.\d+ # per-release image - always_run: false - # Automatically testing with one container runtime in one configuration is sufficient to detect basic problems in kubelet early. - # CRI-O was picked because it was solid for testing so far. - # Periodic variant: ci-node-e2e-containerd-1-7-dra - # run_if_changed: (/dra/|/dynamicresources/|/resourceclaim/|/deviceclass/|/resourceslice/|/resourceclaimtemplate/|/dynamic-resource-allocation/|/pkg/apis/resource/|/api/resource/|/test/e2e_node/dra_).*\.(go|yaml) - optional: true - skip_report: false - labels: - preset-service-account: "true" - preset-k8s-ssh: "true" - annotations: - testgrid-dashboards: sig-node-presubmits, sig-node-dynamic-resource-allocation - testgrid-tab-name: pr-node-kubelet-containerd-dra - decorate: true - decoration_config: - timeout: 90m - path_alias: k8s.io/kubernetes - extra_refs: - - org: kubernetes - repo: test-infra - base_ref: master - path_alias: k8s.io/test-infra - spec: - containers: - - image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20241230-3006692a6f-master - command: - - runner.sh - - /workspace/scenarios/kubernetes_e2e.py - args: - - --deployment=node - - --gcp-zone=us-west1-b - - '--node-test-args=--feature-gates=DynamicResourceAllocation=true --service-feature-gates=DynamicResourceAllocation=true --runtime-config=api/beta=true --container-runtime-endpoint=unix:///run/containerd/containerd.sock --container-runtime-process-name=/usr/bin/containerd --container-runtime-pid-file= --kubelet-flags="--cgroup-driver=systemd --cgroups-per-qos=true --cgroup-root=/ --runtime-cgroups=/system.slice/containerd.service" --extra-log="{\"name\": \"containerd.log\", \"journalctl\": [\"-u\", \"containerd\"]}"' - - --node-tests=true - - --provider=gce - - '--test_args=--timeout=1h --label-filter="Feature: containsAny DynamicResourceAllocation && Feature: isSubsetOf { Beta, DynamicResourceAllocation } && !Flaky && !Slow"' - - --timeout=65m - - --node-args=--image-config-file=/home/prow/go/src/k8s.io/test-infra/jobs/e2e_node/dra/image-config-containerd-1.7.yaml - resources: - requests: - cpu: 4 - memory: 6Gi - limits: - cpu: 4 - memory: 6Gi - name: pull-kubernetes-node-e2e-resource-health-status cluster: k8s-infra-prow-build diff --git a/config/testgrids/kubernetes/presubmits/config.yaml b/config/testgrids/kubernetes/presubmits/config.yaml index dfa07f74e6bf..643ea1215bdc 100644 --- a/config/testgrids/kubernetes/presubmits/config.yaml +++ b/config/testgrids/kubernetes/presubmits/config.yaml @@ -140,11 +140,6 @@ dashboards: - name: pull-kubernetes-node-e2e-containerd-features-kubetest2 test_group_name: pull-kubernetes-node-e2e-containerd-features-kubetest2 base_options: width=10 - - name: pull-kubernetes-kind-dra - test_group_name: pull-kubernetes-kind-dra - base_options: width=10 - alert_options: - alert_mail_to_addresses: patrick.ohly@intel.com - name: presubmits-kubernetes-scalability - name: presubmits-misc - name: presubmits-node-problem-detector From da0ad99ae765bfe42b556185ba33f625736dec01 Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Wed, 25 Dec 2024 17:58:30 +0200 Subject: [PATCH 3/4] Makefile: generate DRA jobs Co-authored-by: Patrick Ohly --- Makefile | 3 +++ hack/make-rules/update/generated-jobs.sh | 26 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100755 hack/make-rules/update/generated-jobs.sh diff --git a/Makefile b/Makefile index 7e9f2f668c91..367f1884b687 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,9 @@ clean: # update generated code #generate: # hack/make-rules/update/generated.sh +.PHONY: generate-jobs +generate-jobs: + hack/make-rules/update/generated-jobs.sh # gofmt #gofmt: # hack/make-rules/update/gofmt.sh diff --git a/hack/make-rules/update/generated-jobs.sh b/hack/make-rules/update/generated-jobs.sh new file mode 100755 index 000000000000..05f4e3017f4a --- /dev/null +++ b/hack/make-rules/update/generated-jobs.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2024 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +echo "Installing requirements3.txt" +hack/run-in-python-container.sh \ + pip3 install -r requirements3.txt + +echo "Generate jobs" +hack/run-in-python-container.sh \ + python3 hack/generate-jobs.py config/jobs/kubernetes/sig-node/*.conf --overwrite From bdac7d119137f588d4b4402004717a98550608c2 Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Tue, 24 Dec 2024 14:58:23 +0200 Subject: [PATCH 4/4] Makefile: verify generated jobs Co-authored-by: Patrick Ohly --- Makefile | 3 +++ hack/make-rules/update/generated-jobs.sh | 2 +- hack/make-rules/verify/all.sh | 6 ++++++ hack/make-rules/verify/generated-jobs.sh | 26 ++++++++++++++++++++++++ requirements3.txt | 1 + 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100755 hack/make-rules/verify/generated-jobs.sh diff --git a/Makefile b/Makefile index 367f1884b687..0c1b34f706a1 100644 --- a/Makefile +++ b/Makefile @@ -129,4 +129,7 @@ verify-boilerplate: .PHONY: verify-yamllint verify-yamllint: hack/make-rules/verify/yamllint.sh +.PHONY: verify-generated-jobs +verify-generated-jobs: + hack/make-rules/verify/generated-jobs.sh ################################################################################# diff --git a/hack/make-rules/update/generated-jobs.sh b/hack/make-rules/update/generated-jobs.sh index 05f4e3017f4a..812eaa029492 100755 --- a/hack/make-rules/update/generated-jobs.sh +++ b/hack/make-rules/update/generated-jobs.sh @@ -23,4 +23,4 @@ hack/run-in-python-container.sh \ echo "Generate jobs" hack/run-in-python-container.sh \ - python3 hack/generate-jobs.py config/jobs/kubernetes/sig-node/*.conf --overwrite + python3 hack/generate-jobs.py config/jobs/**/*.generate.conf --overwrite diff --git a/hack/make-rules/verify/all.sh b/hack/make-rules/verify/all.sh index ef1181bbc94b..2d16b4542e91 100755 --- a/hack/make-rules/verify/all.sh +++ b/hack/make-rules/verify/all.sh @@ -92,6 +92,12 @@ if [[ "${VERIFY_GO_DEPS:-true}" == "true" ]]; then hack/make-rules/verify/go-deps.sh || { FAILED+=($name); echo "ERROR: $name failed"; } cd "${REPO_ROOT}" fi +if [[ "${VERIFY_GENERATED_JOBS:-true}" == "true" ]]; then + name="generated jobs" + echo "verifying $name" + hack/make-rules/verify/generated-jobs.sh || { FAILED+=($name); echo "ERROR: $name failed"; } + cd "${REPO_ROOT}" +fi # exit based on verify scripts if [[ "${#FAILED[@]}" == 0 ]]; then diff --git a/hack/make-rules/verify/generated-jobs.sh b/hack/make-rules/verify/generated-jobs.sh new file mode 100755 index 000000000000..085025586d68 --- /dev/null +++ b/hack/make-rules/verify/generated-jobs.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2024 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +echo "Installing requirements3.txt" +hack/run-in-python-container.sh \ + pip3 install -r requirements3.txt + +echo "Verifying generated jobs" +hack/run-in-python-container.sh \ + python3 hack/generate-jobs.py config/jobs/**/*.generate.conf --only-verify diff --git a/requirements3.txt b/requirements3.txt index f175e07cf907..6b257b702ad4 100644 --- a/requirements3.txt +++ b/requirements3.txt @@ -3,6 +3,7 @@ backports.functools_lru_cache==1.6.1 configparser==4.0.2 chardet==4.0.0 isort==4.3.21 +Jinja2==3.1.5 pylint==2.4.4 parameterized==0.7.4 PyYAML==5.3