Merge pull request #633 from aai-institute/feature/drop-python-38

Drop python 3.8, use ruff, fix some deps and more
aai-institute · Jan 12, 2025 · 454b109 · 454b109
2 parents 262197f + a9c6201
commit 454b109
Show file tree

Hide file tree

Showing 130 changed files with 525 additions and 654 deletions.
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -22,21 +22,21 @@ env:
 jobs:
   code-quality:
     name: Lint code and check type hints
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
     - uses: actions/checkout@v4
-    - name: Setup Python 3.8
+    - name: Setup Python 3.9
       uses: ./.github/actions/python
       with:
-        python_version: 3.8
+        python_version: 3.9
     - uses: actions/cache@v4
       with:
         path: ~/.cache/pre-commit
         key: pre-commit-${{ env.pythonLocation }}-${{ hashFiles('.pre-commit-config.yaml') }}
     - name: Lint Code
       run: |
         pre-commit run --all --show-diff-on-failure
-        python build_scripts/run_pylint.py | (pylint-json2html -f jsonextended -o pylint.html)
+        ruff check src/ --fix
       shell: bash
     - name: Generate mypy cache key
       id: generate-mypy-cache-key
@@ -52,16 +52,16 @@ jobs:
 
   docs:
     name: Build Docs
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0
         lfs: true
-    - name: Setup Python 3.8
+    - name: Setup Python 3.9
       uses: ./.github/actions/python
       with:
-        python_version: 3.8
+        python_version: 3.9
     - name: Install Pandoc
       uses: r-lib/actions/setup-pandoc@v2
       with:
@@ -73,7 +73,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python_version: ["3.8", "3.9", "3.10", "3.11"]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
         group_number: [1, 2, 3, 4]
     name: Run Tests - Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }}
     uses: ./.github/workflows/run-tests-workflow.yaml
@@ -88,7 +88,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python_version: ["3.8", "3.9", "3.10", "3.11"]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
         group_number: [1, 2, 3, 4]
     name: Run Notebook tests -  Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }}
     uses: ./.github/workflows/run-notebook-tests-workflow.yaml
@@ -114,7 +114,7 @@ jobs:
 
   push-docs-and-release-testpypi:
     name: Push Docs and maybe release Package to TestPyPI
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [docs, group-tests, notebook-tests]
     if: ${{ github.ref == 'refs/heads/develop' }}
     concurrency:
@@ -124,10 +124,10 @@ jobs:
         with:
           fetch-depth: 0
           lfs: true
-      - name: Setup Python 3.8
+      - name: Setup Python 3.9
         uses: ./.github/actions/python
         with:
-          python_version: 3.8
+          python_version: 3.9
       - name: Install Pandoc
         uses: r-lib/actions/setup-pandoc@v2
         with:

diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -22,7 +22,7 @@ env:
 
 jobs:
   publish:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     concurrency:
       group: publish
     steps:
@@ -44,10 +44,10 @@ jobs:
         run: |
           echo "Running action locally. Failing"
           exit -1
-      - name: Setup Python 3.8
+      - name: Setup Python 3.9
         uses: ./.github/actions/python
         with:
-          python_version: 3.8
+          python_version: 3.9
       - name: Get Current Version
         run: |
           export CURRENT_VERSION=$(python setup.py --version --quiet | awk -F. '{print $1"."$2"."$3}')

diff --git a/.github/workflows/run-legacy-tests-workflow.yaml b/.github/workflows/run-legacy-tests-workflow.yaml
@@ -22,7 +22,7 @@ env:
 
 jobs:
   run-legacy-tests:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Free Disk Space (Ubuntu)
         uses: jlumbroso/free-disk-space@main

diff --git a/.github/workflows/run-notebook-tests-workflow.yaml b/.github/workflows/run-notebook-tests-workflow.yaml
@@ -21,7 +21,7 @@ env:
 
 jobs:
   run-tests:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
     - uses: actions/checkout@v4
       with:

diff --git a/.github/workflows/run-tests-workflow.yaml b/.github/workflows/run-tests-workflow.yaml
@@ -22,7 +22,7 @@ env:
 
 jobs:
   run-tests:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
     - name: Free Disk Space (Ubuntu)
       uses: jlumbroso/free-disk-space@main

diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
@@ -5,7 +5,7 @@ on:
 
 jobs:
   stale:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/stale@v9
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,15 +1,16 @@
 fail_fast: false
 
 repos:
-  - repo: https://github.com/psf/black
-    rev: 22.10.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.1
     hooks:
-      - id: black-jupyter
-        language_version: python3
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
+      - id: ruff
+        # HACK: ruff-pre-commit ignores pyproject.toml
+        # https://github.com/astral-sh/ruff-pre-commit/issues/54
+        args: [ "--extend-per-file-ignores", "tests/**/*.py:F811",
+                "--extend-per-file-ignores", "tests/**/*.py:F401",
+                "--fix" ]
+      - id: ruff-format
   - repo: https://github.com/kynan/nbstripout
     rev: 0.6.1
     hooks:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -48,14 +48,19 @@
 - Fix a bug in the calculation of variance estimates for MSR Banzhaf
   [PR #605](https://github.com/aai-institute/pyDVL/pull/605)
 - Fix a bug in KNN Shapley values. See [Issue 613](https://github.com/aai-institute/pyDVL/issues/613) for details.
-
+- Backport the KNN Shapley fix to the `value` module
+  [PR #633](https://github.com/aai-institute/pyDVL/pull/633) 
 
 ### Changed
 
 - Use tighter bounds for the calculation of the minimal sample size that guarantees
   an epsilon-delta approximation in group testing (Jia et al. 2023)
   [PR #602](https://github.com/aai-institute/pyDVL/pull/602)
+- Dropped black, isort and pylint from the CI pipeline, in favour of ruff
+  [PR #633](https://github.com/aai-institute/pyDVL/pull/633)
 - **Breaking Changes**
+  - Dropped support for python 3.8 after EOL
+    [PR #633](https://github.com/aai-institute/pyDVL/pull/633)
   - Rename parameter `hessian_regularization` of `DirectInfluence`
     to `regularization` and change the type annotation to allow
     for block-wise regularization parameters

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -47,7 +47,7 @@ pip install -r requirements-dev.txt -r requirements-docs.txt
 With conda:
 
 ```shell
-conda create -n pydvl python=3.8
+conda create -n pydvl python=3.9
 conda activate pydvl
 pip install -r requirements-dev.txt -r requirements-docs.txt
 ```
@@ -89,9 +89,9 @@ failing pipelines. tox will:
 * run the test suite
 * build the documentation
 * build and test installation of the package.
-* generate coverage and pylint reports in html, as well as badges.
+* generate coverage reports in html, as well as badges.
 
-You can configure pytest, coverage and pylint by adjusting
+You can configure pytest, coverage and ruff by adjusting
 [pyproject.toml](pyproject.toml).
 
 Besides the usual unit tests, most algorithms are tested using pytest. This
@@ -537,11 +537,11 @@ on the job id to be unique (but then you'll see warnings for the workflows
 without that job id).
 
 ```shell
-# Run only the main tests for python 3.8 after a push event (implicit) 
+# Run only the main tests for python 3.9 after a push event (implicit) 
 act -W .github/workflows/run-tests-workflow.yaml \
     -j run-tests \
     --input tests_to_run=base\
-    --input python_version=3.8
+    --input python_version=3.9
 ```
 
 Other common flags are: 

diff --git a/build_scripts/generate_api_docs.py b/build_scripts/generate_api_docs.py
@@ -1,4 +1,5 @@
 """Generate the code reference pages."""
+
 from pathlib import Path
 
 import mkdocs_gen_files

diff --git a/build_scripts/run_pylint.py b/build_scripts/run_pylint.py
diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md
@@ -40,7 +40,7 @@ python -c "import pydvl; print(pydvl.__version__)"
 
 ## Dependencies
 
-pyDVL requires Python >= 3.8, [numpy](https://numpy.org/),
+pyDVL requires Python >= 3.9, [numpy](https://numpy.org/),
 [scikit-learn](https://scikit-learn.org/stable/), [scipy](https://scipy.org/),
 [cvxpy](https://www.cvxpy.org/) for the core methods, and
 [joblib](https://joblib.readthedocs.io/en/stable/) for parallelization locally.

diff --git a/notebooks/data_oob.ipynb b/notebooks/data_oob.ipynb
@@ -67,8 +67,8 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "from sklearn.neighbors import KNeighborsClassifier\n",
-    "from tqdm.notebook import tqdm\n",
     "from support.common import load_adult_data\n",
+    "from tqdm.notebook import tqdm\n",
     "\n",
     "from pydvl.parallel import init_executor\n",
     "from pydvl.reporting.plots import plot_ci_array, plot_ci_values\n",
@@ -369,7 +369,7 @@
     "shade_colors = [\"lightskyblue\", \"firebrick\", \"seagreen\", \"gold\", \"plum\"]\n",
     "fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[15, 5])\n",
     "\n",
-    "for (n_est, values, mean_color, shade_color) in zip(\n",
+    "for n_est, values, mean_color, shade_color in zip(\n",
     "    n_estimators, oob_values, mean_colors, shade_colors\n",
     "):\n",
     "    values.sort(key=\"value\")\n",

diff --git a/notebooks/influence_imagenet.ipynb b/notebooks/influence_imagenet.ipynb
@@ -76,30 +76,31 @@
    "source": [
     "%autoreload\n",
     "%matplotlib inline\n",
-    "from typing import Tuple\n",
     "import logging\n",
+    "import os\n",
+    "from typing import Tuple\n",
+    "\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "import os\n",
     "import pandas as pd\n",
     "import torch\n",
-    "from torch import nn\n",
-    "from torch.utils.data import DataLoader, TensorDataset\n",
     "from support.common import (\n",
-    "    plot_sample_images,\n",
-    "    plot_lowest_highest_influence_images,\n",
-    "    plot_losses,\n",
+    "    compute_mean_corrupted_influences,\n",
     "    corrupt_imagenet,\n",
     "    load_preprocess_imagenet,\n",
     "    plot_corrupted_influences_distribution,\n",
-    "    compute_mean_corrupted_influences,\n",
+    "    plot_losses,\n",
+    "    plot_lowest_highest_influence_images,\n",
+    "    plot_sample_images,\n",
     ")\n",
     "from support.torch import (\n",
-    "    TrainingManager,\n",
     "    MODEL_PATH,\n",
+    "    TrainingManager,\n",
     "    new_resnet_model,\n",
     ")\n",
     "from support.types import Losses\n",
+    "from torch import nn\n",
+    "from torch.utils.data import DataLoader, TensorDataset\n",
     "\n",
     "logging.basicConfig(level=logging.INFO)\n",
     "default_figsize = (7, 7)\n",
@@ -121,9 +122,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, f1_score\n",
+    "\n",
     "from pydvl.influence.torch import CgInfluence\n",
-    "from pydvl.reporting.plots import plot_influence_distribution_by_label\n",
-    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score"
+    "from pydvl.reporting.plots import plot_influence_distribution_by_label"
    ]
   },
   {

diff --git a/notebooks/influence_sentiment_analysis.ipynb b/notebooks/influence_sentiment_analysis.ipynb
@@ -94,10 +94,10 @@
     "from datasets import load_dataset\n",
     "from IPython.display import HTML, display\n",
     "from sklearn.metrics import f1_score\n",
+    "from support.torch import ImdbDataset, ModelLogitsWrapper\n",
     "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
     "\n",
-    "from pydvl.influence.torch import EkfacInfluence\n",
-    "from support.torch import ImdbDataset, ModelLogitsWrapper"
+    "from pydvl.influence.torch import EkfacInfluence"
    ]
   },
   {
@@ -1215,7 +1215,7 @@
     "    for idx, mean_infl in enumerate(group_df[\"mean_infl\"]):\n",
     "        if idx == 0:\n",
     "            continue\n",
-    "        reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx-1]} -> {group_df['reg_value'].iloc[idx]}\"\n",
+    "        reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx - 1]} -> {group_df['reg_value'].iloc[idx]}\"\n",
     "        pearson = pearsonr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n",
     "        spearman = spearmanr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n",
     "        result_corr[layer_id + \"_pearson\"].update({f\"{reg_value_diff}\": pearson})\n",

diff --git a/notebooks/influence_synthetic.ipynb b/notebooks/influence_synthetic.ipynb
@@ -96,27 +96,29 @@
     "\n",
     "import os\n",
     "import random\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import torch\n",
     "import torch.nn.functional as F\n",
-    "import matplotlib.pyplot as plt\n",
-    "from pydvl.influence.torch import DirectInfluence, CgInfluence\n",
-    "from support.shapley import (\n",
-    "    synthetic_classification_dataset,\n",
-    "    decision_boundary_fixed_variance_2d,\n",
-    ")\n",
+    "from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix\n",
     "from support.common import (\n",
     "    plot_gaussian_blobs,\n",
-    "    plot_losses,\n",
     "    plot_influences,\n",
+    "    plot_losses,\n",
+    ")\n",
+    "from support.shapley import (\n",
+    "    decision_boundary_fixed_variance_2d,\n",
+    "    synthetic_classification_dataset,\n",
     ")\n",
     "from support.torch import (\n",
-    "    fit_torch_model,\n",
     "    TorchLogisticRegression,\n",
+    "    fit_torch_model,\n",
     ")\n",
-    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
     "from torch.optim import AdamW, lr_scheduler\n",
-    "from torch.utils.data import DataLoader, TensorDataset"
+    "from torch.utils.data import DataLoader, TensorDataset\n",
+    "\n",
+    "from pydvl.influence.torch import CgInfluence, DirectInfluence"
    ]
   },
   {
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,7 @@ on: @@
     jobs:
       stale:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-22.04
         steps:
           - uses: actions/stale@v9
             with:
@@ Expand Down @@