Skip to content

Commit

Permalink
Merge pull request #633 from aai-institute/feature/drop-python-38
Browse files Browse the repository at this point in the history
Drop python 3.8, use ruff, fix some deps and more
  • Loading branch information
mdbenito authored Jan 12, 2025
2 parents 262197f + a9c6201 commit 454b109
Show file tree
Hide file tree
Showing 130 changed files with 525 additions and 654 deletions.
24 changes: 12 additions & 12 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@ env:
jobs:
code-quality:
name: Lint code and check type hints
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Setup Python 3.8
- name: Setup Python 3.9
uses: ./.github/actions/python
with:
python_version: 3.8
python_version: 3.9
- uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ env.pythonLocation }}-${{ hashFiles('.pre-commit-config.yaml') }}
- name: Lint Code
run: |
pre-commit run --all --show-diff-on-failure
python build_scripts/run_pylint.py | (pylint-json2html -f jsonextended -o pylint.html)
ruff check src/ --fix
shell: bash
- name: Generate mypy cache key
id: generate-mypy-cache-key
Expand All @@ -52,16 +52,16 @@ jobs:

docs:
name: Build Docs
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Setup Python 3.8
- name: Setup Python 3.9
uses: ./.github/actions/python
with:
python_version: 3.8
python_version: 3.9
- name: Install Pandoc
uses: r-lib/actions/setup-pandoc@v2
with:
Expand All @@ -73,7 +73,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python_version: ["3.8", "3.9", "3.10", "3.11"]
python_version: ["3.9", "3.10", "3.11", "3.12"]
group_number: [1, 2, 3, 4]
name: Run Tests - Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }}
uses: ./.github/workflows/run-tests-workflow.yaml
Expand All @@ -88,7 +88,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python_version: ["3.8", "3.9", "3.10", "3.11"]
python_version: ["3.9", "3.10", "3.11", "3.12"]
group_number: [1, 2, 3, 4]
name: Run Notebook tests - Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }}
uses: ./.github/workflows/run-notebook-tests-workflow.yaml
Expand All @@ -114,7 +114,7 @@ jobs:

push-docs-and-release-testpypi:
name: Push Docs and maybe release Package to TestPyPI
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
needs: [docs, group-tests, notebook-tests]
if: ${{ github.ref == 'refs/heads/develop' }}
concurrency:
Expand All @@ -124,10 +124,10 @@ jobs:
with:
fetch-depth: 0
lfs: true
- name: Setup Python 3.8
- name: Setup Python 3.9
uses: ./.github/actions/python
with:
python_version: 3.8
python_version: 3.9
- name: Install Pandoc
uses: r-lib/actions/setup-pandoc@v2
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:

jobs:
publish:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
concurrency:
group: publish
steps:
Expand All @@ -44,10 +44,10 @@ jobs:
run: |
echo "Running action locally. Failing"
exit -1
- name: Setup Python 3.8
- name: Setup Python 3.9
uses: ./.github/actions/python
with:
python_version: 3.8
python_version: 3.9
- name: Get Current Version
run: |
export CURRENT_VERSION=$(python setup.py --version --quiet | awk -F. '{print $1"."$2"."$3}')
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-legacy-tests-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:

jobs:
run-legacy-tests:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-notebook-tests-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:

jobs:
run-tests:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-tests-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:

jobs:
run-tests:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/stale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:

jobs:
stale:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/stale@v9
with:
Expand Down
17 changes: 9 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
fail_fast: false

repos:
- repo: https://github.com/psf/black
rev: 22.10.0
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.1
hooks:
- id: black-jupyter
language_version: python3
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
- id: ruff
# HACK: ruff-pre-commit ignores pyproject.toml
# https://github.com/astral-sh/ruff-pre-commit/issues/54
args: [ "--extend-per-file-ignores", "tests/**/*.py:F811",
"--extend-per-file-ignores", "tests/**/*.py:F401",
"--fix" ]
- id: ruff-format
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
Expand Down
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,19 @@
- Fix a bug in the calculation of variance estimates for MSR Banzhaf
[PR #605](https://github.com/aai-institute/pyDVL/pull/605)
- Fix a bug in KNN Shapley values. See [Issue 613](https://github.com/aai-institute/pyDVL/issues/613) for details.

- Backport the KNN Shapley fix to the `value` module
[PR #633](https://github.com/aai-institute/pyDVL/pull/633)

### Changed

- Use tighter bounds for the calculation of the minimal sample size that guarantees
an epsilon-delta approximation in group testing (Jia et al. 2023)
[PR #602](https://github.com/aai-institute/pyDVL/pull/602)
- Dropped black, isort and pylint from the CI pipeline, in favour of ruff
[PR #633](https://github.com/aai-institute/pyDVL/pull/633)
- **Breaking Changes**
- Dropped support for python 3.8 after EOL
[PR #633](https://github.com/aai-institute/pyDVL/pull/633)
- Rename parameter `hessian_regularization` of `DirectInfluence`
to `regularization` and change the type annotation to allow
for block-wise regularization parameters
Expand Down
10 changes: 5 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pip install -r requirements-dev.txt -r requirements-docs.txt
With conda:

```shell
conda create -n pydvl python=3.8
conda create -n pydvl python=3.9
conda activate pydvl
pip install -r requirements-dev.txt -r requirements-docs.txt
```
Expand Down Expand Up @@ -89,9 +89,9 @@ failing pipelines. tox will:
* run the test suite
* build the documentation
* build and test installation of the package.
* generate coverage and pylint reports in html, as well as badges.
* generate coverage reports in html, as well as badges.

You can configure pytest, coverage and pylint by adjusting
You can configure pytest, coverage and ruff by adjusting
[pyproject.toml](pyproject.toml).

Besides the usual unit tests, most algorithms are tested using pytest. This
Expand Down Expand Up @@ -537,11 +537,11 @@ on the job id to be unique (but then you'll see warnings for the workflows
without that job id).

```shell
# Run only the main tests for python 3.8 after a push event (implicit)
# Run only the main tests for python 3.9 after a push event (implicit)
act -W .github/workflows/run-tests-workflow.yaml \
-j run-tests \
--input tests_to_run=base\
--input python_version=3.8
--input python_version=3.9
```

Other common flags are:
Expand Down
1 change: 1 addition & 0 deletions build_scripts/generate_api_docs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Generate the code reference pages."""

from pathlib import Path

import mkdocs_gen_files
Expand Down
19 changes: 0 additions & 19 deletions build_scripts/run_pylint.py

This file was deleted.

2 changes: 1 addition & 1 deletion docs/getting-started/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ python -c "import pydvl; print(pydvl.__version__)"

## Dependencies

pyDVL requires Python >= 3.8, [numpy](https://numpy.org/),
pyDVL requires Python >= 3.9, [numpy](https://numpy.org/),
[scikit-learn](https://scikit-learn.org/stable/), [scipy](https://scipy.org/),
[cvxpy](https://www.cvxpy.org/) for the core methods, and
[joblib](https://joblib.readthedocs.io/en/stable/) for parallelization locally.
Expand Down
4 changes: 2 additions & 2 deletions notebooks/data_oob.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from tqdm.notebook import tqdm\n",
"from support.common import load_adult_data\n",
"from tqdm.notebook import tqdm\n",
"\n",
"from pydvl.parallel import init_executor\n",
"from pydvl.reporting.plots import plot_ci_array, plot_ci_values\n",
Expand Down Expand Up @@ -369,7 +369,7 @@
"shade_colors = [\"lightskyblue\", \"firebrick\", \"seagreen\", \"gold\", \"plum\"]\n",
"fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[15, 5])\n",
"\n",
"for (n_est, values, mean_color, shade_color) in zip(\n",
"for n_est, values, mean_color, shade_color in zip(\n",
" n_estimators, oob_values, mean_colors, shade_colors\n",
"):\n",
" values.sort(key=\"value\")\n",
Expand Down
24 changes: 13 additions & 11 deletions notebooks/influence_imagenet.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,30 +76,31 @@
"source": [
"%autoreload\n",
"%matplotlib inline\n",
"from typing import Tuple\n",
"import logging\n",
"import os\n",
"from typing import Tuple\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import os\n",
"import pandas as pd\n",
"import torch\n",
"from torch import nn\n",
"from torch.utils.data import DataLoader, TensorDataset\n",
"from support.common import (\n",
" plot_sample_images,\n",
" plot_lowest_highest_influence_images,\n",
" plot_losses,\n",
" compute_mean_corrupted_influences,\n",
" corrupt_imagenet,\n",
" load_preprocess_imagenet,\n",
" plot_corrupted_influences_distribution,\n",
" compute_mean_corrupted_influences,\n",
" plot_losses,\n",
" plot_lowest_highest_influence_images,\n",
" plot_sample_images,\n",
")\n",
"from support.torch import (\n",
" TrainingManager,\n",
" MODEL_PATH,\n",
" TrainingManager,\n",
" new_resnet_model,\n",
")\n",
"from support.types import Losses\n",
"from torch import nn\n",
"from torch.utils.data import DataLoader, TensorDataset\n",
"\n",
"logging.basicConfig(level=logging.INFO)\n",
"default_figsize = (7, 7)\n",
Expand All @@ -121,9 +122,10 @@
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, f1_score\n",
"\n",
"from pydvl.influence.torch import CgInfluence\n",
"from pydvl.reporting.plots import plot_influence_distribution_by_label\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score"
"from pydvl.reporting.plots import plot_influence_distribution_by_label"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions notebooks/influence_sentiment_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@
"from datasets import load_dataset\n",
"from IPython.display import HTML, display\n",
"from sklearn.metrics import f1_score\n",
"from support.torch import ImdbDataset, ModelLogitsWrapper\n",
"from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
"\n",
"from pydvl.influence.torch import EkfacInfluence\n",
"from support.torch import ImdbDataset, ModelLogitsWrapper"
"from pydvl.influence.torch import EkfacInfluence"
]
},
{
Expand Down Expand Up @@ -1215,7 +1215,7 @@
" for idx, mean_infl in enumerate(group_df[\"mean_infl\"]):\n",
" if idx == 0:\n",
" continue\n",
" reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx-1]} -> {group_df['reg_value'].iloc[idx]}\"\n",
" reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx - 1]} -> {group_df['reg_value'].iloc[idx]}\"\n",
" pearson = pearsonr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n",
" spearman = spearmanr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n",
" result_corr[layer_id + \"_pearson\"].update({f\"{reg_value_diff}\": pearson})\n",
Expand Down
22 changes: 12 additions & 10 deletions notebooks/influence_synthetic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,27 +96,29 @@
"\n",
"import os\n",
"import random\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn.functional as F\n",
"import matplotlib.pyplot as plt\n",
"from pydvl.influence.torch import DirectInfluence, CgInfluence\n",
"from support.shapley import (\n",
" synthetic_classification_dataset,\n",
" decision_boundary_fixed_variance_2d,\n",
")\n",
"from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix\n",
"from support.common import (\n",
" plot_gaussian_blobs,\n",
" plot_losses,\n",
" plot_influences,\n",
" plot_losses,\n",
")\n",
"from support.shapley import (\n",
" decision_boundary_fixed_variance_2d,\n",
" synthetic_classification_dataset,\n",
")\n",
"from support.torch import (\n",
" fit_torch_model,\n",
" TorchLogisticRegression,\n",
" fit_torch_model,\n",
")\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
"from torch.optim import AdamW, lr_scheduler\n",
"from torch.utils.data import DataLoader, TensorDataset"
"from torch.utils.data import DataLoader, TensorDataset\n",
"\n",
"from pydvl.influence.torch import CgInfluence, DirectInfluence"
]
},
{
Expand Down
Loading

0 comments on commit 454b109

Please sign in to comment.