Merge remote-tracking branch 'upstream/main' into main

* upstream/main: Improve Small Benchmark Accuracy (SciTools#4636) Sperf & Cperf Benchmarks (SciTools#4621) Votable Issues (SciTools#4617) Overnight benchmarks - find a valid issue assignee (SciTools#4627) Updated environment lockfiles (SciTools#4624) Scalar Scatter Plot (SciTools#4616) Remove no_clobber task from Refresh lockfiles Action (SciTools#4618) purge deploy key (SciTools#4615) Overnight benchmarks remove ambiguity between file and commit names. (SciTools#4620) Final offline benchmark migration (SciTools#4562)
tkknight · Mar 13, 2022 · d2c45b1 · d2c45b1
2 parents 61a94aa + 3ee7c56
commit d2c45b1
Show file tree

Hide file tree

Showing 47 changed files with 2,319 additions and 211 deletions.
diff --git a/.github/deploy_key.scitools-docs.enc b/.github/deploy_key.scitools-docs.enc
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -79,12 +79,22 @@ jobs:
             cd benchmarks/.asv/performance-shifts
             for commit_file in *
             do
-              pr_number=$(git log "$commit_file"^! --oneline | grep -o "#[0-9]*" | tail -1 | cut -c 2-)
-              assignee=$(gh pr view $pr_number --json author -q '.["author"]["login"]' --repo $GITHUB_REPOSITORY)
-              title="Performance Shift(s): \`$commit_file\`"
+              commit="${commit_file%.*}"
+              pr_number=$(git log "$commit"^! --oneline | grep -o "#[0-9]*" | tail -1 | cut -c 2-)
+              author=$(gh pr view $pr_number --json author -q '.["author"]["login"]' --repo $GITHUB_REPOSITORY)
+              merger=$(gh pr view $pr_number --json mergedBy -q '.["mergedBy"]["login"]' --repo $GITHUB_REPOSITORY)
+              # Find a valid assignee from author/merger/nothing.
+              if curl -s https://api.github.com/users/$author | grep -q "login"; then
+                assignee=$author
+              elif curl -s https://api.github.com/users/$merger | grep -q "login"; then
+                assignee=$merger
+              else
+                assignee=""
+              fi
+              title="Performance Shift(s): \`$commit\`"
               body="
           Benchmark comparison has identified performance shifts at commit \
-          $commit_file (#$pr_number). Please review the report below and \
+          $commit (#$pr_number). Please review the report below and \
           take corrective/congratulatory action as appropriate \
           :slightly_smiling_face:
 

diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
@@ -2,7 +2,7 @@
 # available packages and dependencies.
 #
 # Environment specifications are given as conda environment.yml files found in
-# `requirements/ci/py**.yml`.  These state the pacakges required, the conda channels
+# `requirements/ci/py**.yml`.  These state the packages required, the conda channels
 # that the packages will be pulled from, and any versions of packages that need to be
 # pinned at specific versions.
 #
@@ -14,12 +14,6 @@ name: Refresh Lockfiles
 
 on:
   workflow_dispatch:
-    inputs:
-      clobber:
-        description: |
-          Force the workflow to run, potentially clobbering any commits already made to the branch. 
-          Enter "yes" or "true" to run.
-        default: "no"
   schedule:
     # Run once a week on a Saturday night 
     # N.B. "should" be quoted, according to
@@ -28,38 +22,6 @@ on:
 
 
 jobs:
-
-  no_clobber:
-    if: "github.repository == 'SciTools/iris'"
-    runs-on: ubuntu-latest
-    steps:
-      # check if the auto-update-lockfiles branch exists.  If it does, and someone other than
-      # the lockfile bot has made the head commit, abort the workflow.
-      # This job can be manually overridden by running directly from the github actions panel
-      # (known as a "workflow_dispatch") and setting the `clobber` input to "yes".
-      - uses: actions/script@v6
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          script: |
-            if (context.eventName == "workflow_dispatch") {
-              const clobber = context.payload.inputs.clobber || "no";
-              if (["yes", "true", "y"].includes(clobber.trim().toLowerCase())) {
-                core.info("Manual override, continuing workflow, potentially overwriting previous commits to auto-update-lockfiles");
-                return
-              }
-            }
-            github.repos.getBranch({...context.repo, branch: "auto-update-lockfiles"}).then(res => {
-              const committer = res.data.commit.commit.committer;
-              if (committer && committer.name === "Lockfile bot") {
-                core.info("Lockfile bot was the last to push to auto-update-lockfiles. Continue.");
-              } else {
-                core.setFailed("New commits to auto-update-lockfiles since bot last ran. Abort!");
-              }
-            }).catch(err => {
-              if (err.status === 404) {
-                  core.info("auto-update-lockfiles branch not found, continue");
-              }
-            })
 
   gen_lockfiles:
     # this is a matrix job: it splits to create new lockfiles for each
@@ -69,7 +31,6 @@ jobs:
     #       ref: https://tomasvotruba.com/blog/2020/11/16/how-to-make-dynamic-matrix-in-github-actions/
     if: "github.repository == 'SciTools/iris'"
     runs-on: ubuntu-latest
-    needs: no_clobber
 
     strategy:
       matrix:
@@ -121,6 +82,8 @@ jobs:
           title: "[iris.ci] environment lockfiles auto-update"
           body: |
             Lockfiles updated to the latest resolvable environment.
+
+            If the CI test suite fails, create a new branch based of this pull request and add the required fixes to that branch.
           labels: |
             New: Pull Request
             Bot

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -21,13 +21,20 @@ automated overnight run locally. See the session docstring for detail.
 
 ### Environment variables
 
-* ``DATA_GEN_PYTHON`` - required - path to a Python executable that can be
+* `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
+`iris-test-data` content, and your local `site.cfg` is not available for
+benchmark scripts.
+* `DATA_GEN_PYTHON` - required - path to a Python executable that can be
 used to generate benchmark test objects/files; see
 [Data generation](#data-generation). The Nox session sets this automatically,
 but will defer to any value already set in the shell.
-* ``BENCHMARK_DATA`` - optional - path to a directory for benchmark synthetic
+* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
 test data, which the benchmark scripts will create if it doesn't already
-exist. Defaults to ``<root>/benchmarks/.data/`` if not set.
+exist. Defaults to `<root>/benchmarks/.data/` if not set.
+* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks
+decorated with `@on_demand_benchmark` are included in the ASV run. Usually
+coupled with the ASV `--bench` argument to only run the benchmark(s) of
+interest. Is set during the Nox `cperf` and `sperf` sessions.
 
 ## Writing benchmarks
 
@@ -65,6 +72,16 @@ be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences
 might only be seen for the larger value, or the smaller, or both, getting you
 closer to the root cause.
 
+### On-demand benchmarks
+
+Some benchmarks provide useful insight but are inappropriate to be included in
+a benchmark run by default, e.g. those with long run-times or requiring a local
+file. These benchmarks should be decorated with `@on_demand_benchmark`
+(see [benchmarks init](./benchmarks/__init__.py)), which
+sets the benchmark to only be included in a run when the `ON_DEMAND_BENCHMARKS`
+environment variable is set. Examples include the CPerf and SPerf benchmark
+suites for the UK Met Office NG-VAT project.
+
 ## Benchmark environments
 
 We have disabled ASV's standard environment management, instead using an

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -5,6 +5,7 @@
     "repo": "..",
     "environment_type": "conda-delegated",
     "show_commit_url": "http://github.com/scitools/iris/commit/",
+    "branches": ["upstream/main"],
 
     "benchmark_dir": "./benchmarks",
     "env_dir": ".asv/env",

diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -4,5 +4,126 @@
 # See COPYING and COPYING.LESSER in the root of the repository for full
 # licensing details.
 """Common code for benchmarks."""
+from functools import wraps
+from os import environ
+import resource
 
 ARTIFICIAL_DIM_SIZE = int(10e3)  # For all artificial cubes, coords etc.
+
+
+def disable_repeat_between_setup(benchmark_object):
+    """
+    Decorator for benchmarks where object persistence would be inappropriate.
+
+    E.g:
+        * Benchmarking data realisation
+        * Benchmarking Cube coord addition
+
+    Can be applied to benchmark classes/methods/functions.
+
+    https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks
+
+    """
+    # Prevent repeat runs between setup() runs - object(s) will persist after 1st.
+    benchmark_object.number = 1
+    # Compensate for reduced certainty by increasing number of repeats.
+    #  (setup() is run between each repeat).
+    #  Minimum 5 repeats, run up to 30 repeats / 20 secs whichever comes first.
+    benchmark_object.repeat = (5, 30, 20.0)
+    # ASV uses warmup to estimate benchmark time before planning the real run.
+    #  Prevent this, since object(s) will persist after first warmup run,
+    #  which would give ASV misleading info (warmups ignore ``number``).
+    benchmark_object.warmup_time = 0.0
+
+    return benchmark_object
+
+
+class TrackAddedMemoryAllocation:
+    """
+    Context manager which measures by how much process resident memory grew,
+    during execution of its enclosed code block.
+
+    Obviously limited as to what it actually measures : Relies on the current
+    process not having significant unused (de-allocated) memory when the
+    tested codeblock runs, and only reliable when the code allocates a
+    significant amount of new memory.
+
+    Example:
+        with TrackAddedMemoryAllocation() as mb:
+            initial_call()
+            other_call()
+        result = mb.addedmem_mb()
+
+    """
+
+    @staticmethod
+    def process_resident_memory_mb():
+        return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
+
+    def __enter__(self):
+        self.mb_before = self.process_resident_memory_mb()
+        return self
+
+    def __exit__(self, *_):
+        self.mb_after = self.process_resident_memory_mb()
+
+    def addedmem_mb(self):
+        """Return measured memory growth, in Mb."""
+        return self.mb_after - self.mb_before
+
+    @staticmethod
+    def decorator(changed_params: list = None):
+        """
+        Decorates this benchmark to track growth in resident memory during execution.
+
+        Intended for use on ASV ``track_`` benchmarks. Applies the
+        :class:`TrackAddedMemoryAllocation` context manager to the benchmark
+        code, sets the benchmark ``unit`` attribute to ``Mb``. Optionally
+        replaces the benchmark ``params`` attribute with ``changed_params`` -
+        useful to avoid testing very small memory volumes, where the results
+        are vulnerable to noise.
+
+        Parameters
+        ----------
+        changed_params : list
+            Replace the benchmark's ``params`` attribute with this list.
+
+        """
+        if changed_params:
+            # Must make a copy for re-use safety!
+            _changed_params = list(changed_params)
+        else:
+            _changed_params = None
+
+        def _inner_decorator(decorated_func):
+            @wraps(decorated_func)
+            def _inner_func(*args, **kwargs):
+                assert decorated_func.__name__[:6] == "track_"
+                # Run the decorated benchmark within the added memory context manager.
+                with TrackAddedMemoryAllocation() as mb:
+                    decorated_func(*args, **kwargs)
+                return mb.addedmem_mb()
+
+            if _changed_params:
+                # Replace the params if replacement provided.
+                _inner_func.params = _changed_params
+            _inner_func.unit = "Mb"
+            return _inner_func
+
+        return _inner_decorator
+
+
+def on_demand_benchmark(benchmark_object):
+    """
+    Decorator. Disables these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.
+
+    For benchmarks that, for whatever reason, should not be run by default.
+    E.g:
+        * Require a local file
+        * Used for scalability analysis instead of commit monitoring.
+
+    Can be applied to benchmark classes/methods/functions.
+
+    """
+    if "ON_DEMAND_BENCHMARKS" in environ:
+        return benchmark_object
diff --git a/benchmarks/benchmarks/aux_factory.py b/benchmarks/benchmarks/aux_factory.py
@@ -10,9 +10,10 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import aux_factory, coords
 
+from . import ARTIFICIAL_DIM_SIZE
+
 
 class FactoryCommon:
     # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released:

diff --git a/benchmarks/benchmarks/coords.py b/benchmarks/benchmarks/coords.py
@@ -10,9 +10,10 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import coords
 
+from . import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
+
 
 def setup():
     """General variables needed by multiple benchmark classes."""
@@ -92,6 +93,23 @@ def setup(self):
     def create(self):
         return coords.AuxCoord(**self.create_kwargs)
 
+    def time_points(self):
+        _ = self.component.points
+
+    def time_bounds(self):
+        _ = self.component.bounds
+
+
+@disable_repeat_between_setup
+class AuxCoordLazy(AuxCoord):
+    """Lazy equivalent of :class:`AuxCoord`."""
+
+    def setup(self):
+        super().setup()
+        self.create_kwargs["points"] = self.component.lazy_points()
+        self.create_kwargs["bounds"] = self.component.lazy_bounds()
+        self.setup_common()
+
 
 class CellMeasure(CoordCommon):
     def setup(self):