From 364ce67a3e93c39af49c93fb2aef25893829fc12 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Mon, 23 Dec 2024 16:11:16 +0100 Subject: [PATCH 01/21] Drop python 3.8 in workflows --- .github/workflows/main.yaml | 16 ++++++++-------- .github/workflows/publish.yaml | 4 ++-- CONTRIBUTING.md | 6 +++--- docs/getting-started/index.md | 2 +- pyproject.toml | 2 +- setup.py | 2 +- tox.ini | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index c7278a372..becca5a74 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -25,10 +25,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Setup Python 3.8 + - name: Setup Python 3.9 uses: ./.github/actions/python with: - python_version: 3.8 + python_version: 3.9 - uses: actions/cache@v4 with: path: ~/.cache/pre-commit @@ -58,10 +58,10 @@ jobs: with: fetch-depth: 0 lfs: true - - name: Setup Python 3.8 + - name: Setup Python 3.9 uses: ./.github/actions/python with: - python_version: 3.8 + python_version: 3.9 - name: Install Pandoc uses: r-lib/actions/setup-pandoc@v2 with: @@ -73,7 +73,7 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.8", "3.9", "3.10", "3.11"] + python_version: ["3.9", "3.10", "3.11", "3.12"] group_number: [1, 2, 3, 4] name: Run Tests - Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }} uses: ./.github/workflows/run-tests-workflow.yaml @@ -88,7 +88,7 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.8", "3.9", "3.10", "3.11"] + python_version: ["3.9", "3.10", "3.11", "3.12"] group_number: [1, 2, 3, 4] name: Run Notebook tests - Python ${{ matrix.python_version }} - Group ${{ matrix.group_number }} uses: ./.github/workflows/run-notebook-tests-workflow.yaml @@ -124,10 +124,10 @@ jobs: with: fetch-depth: 0 lfs: true - - name: Setup Python 3.8 + - name: Setup Python 3.9 uses: ./.github/actions/python with: - python_version: 3.8 + python_version: 3.9 - name: Install Pandoc uses: r-lib/actions/setup-pandoc@v2 with: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 70beebd20..0c90d7aee 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -44,10 +44,10 @@ jobs: run: | echo "Running action locally. Failing" exit -1 - - name: Setup Python 3.8 + - name: Setup Python 3.9 uses: ./.github/actions/python with: - python_version: 3.8 + python_version: 3.9 - name: Get Current Version run: | export CURRENT_VERSION=$(python setup.py --version --quiet | awk -F. '{print $1"."$2"."$3}') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ecd1288de..ee4e1166d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,7 +47,7 @@ pip install -r requirements-dev.txt -r requirements-docs.txt With conda: ```shell -conda create -n pydvl python=3.8 +conda create -n pydvl python=3.9 conda activate pydvl pip install -r requirements-dev.txt -r requirements-docs.txt ``` @@ -537,11 +537,11 @@ on the job id to be unique (but then you'll see warnings for the workflows without that job id). ```shell -# Run only the main tests for python 3.8 after a push event (implicit) +# Run only the main tests for python 3.9 after a push event (implicit) act -W .github/workflows/run-tests-workflow.yaml \ -j run-tests \ --input tests_to_run=base\ - --input python_version=3.8 + --input python_version=3.9 ``` Other common flags are: diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md index da4b2c1eb..a74147b5d 100644 --- a/docs/getting-started/index.md +++ b/docs/getting-started/index.md @@ -40,7 +40,7 @@ python -c "import pydvl; print(pydvl.__version__)" ## Dependencies -pyDVL requires Python >= 3.8, [numpy](https://numpy.org/), +pyDVL requires Python >= 3.9, [numpy](https://numpy.org/), [scikit-learn](https://scikit-learn.org/stable/), [scipy](https://scipy.org/), [cvxpy](https://www.cvxpy.org/) for the core methods, and [joblib](https://joblib.readthedocs.io/en/stable/) for parallelization locally. diff --git a/pyproject.toml b/pyproject.toml index dac623ef8..eab0270b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ load-plugins = ["pylint_json2html"] output-format = "jsonextended" [tool.mypy] -python_version = "3.8" +python_version = "3.9" mypy_path = './src/' ignore_missing_imports = true warn_return_any = true diff --git a/setup.py b/setup.py index 300ceed6a..270d16cdf 100644 --- a/setup.py +++ b/setup.py @@ -40,10 +40,10 @@ "Development Status :: 4 - Beta", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Typing :: Typed", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", diff --git a/tox.ini b/tox.ini index 206a347a0..d27d9a949 100644 --- a/tox.ini +++ b/tox.ini @@ -46,7 +46,7 @@ whitelist_externals = bash [testenv:type-checking] -basepython = python3.8 +basepython = python3.9 skip_install = true setenv = MYPY_FORCE_COLOR=1 From ccd699102692635a5fee744b92fd396dd752f141 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 11:24:55 +0100 Subject: [PATCH 02/21] Pin ubuntu version in gh runners --- .github/workflows/main.yaml | 6 +++--- .github/workflows/publish.yaml | 2 +- .github/workflows/run-legacy-tests-workflow.yaml | 2 +- .github/workflows/run-notebook-tests-workflow.yaml | 2 +- .github/workflows/run-tests-workflow.yaml | 2 +- .github/workflows/stale.yaml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index becca5a74..bed9fa95b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,7 +22,7 @@ env: jobs: code-quality: name: Lint code and check type hints - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - name: Setup Python 3.9 @@ -52,7 +52,7 @@ jobs: docs: name: Build Docs - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 with: @@ -114,7 +114,7 @@ jobs: push-docs-and-release-testpypi: name: Push Docs and maybe release Package to TestPyPI - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 needs: [docs, group-tests, notebook-tests] if: ${{ github.ref == 'refs/heads/develop' }} concurrency: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 0c90d7aee..6f28db647 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -22,7 +22,7 @@ env: jobs: publish: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 concurrency: group: publish steps: diff --git a/.github/workflows/run-legacy-tests-workflow.yaml b/.github/workflows/run-legacy-tests-workflow.yaml index cf8243be5..f168f687a 100644 --- a/.github/workflows/run-legacy-tests-workflow.yaml +++ b/.github/workflows/run-legacy-tests-workflow.yaml @@ -22,7 +22,7 @@ env: jobs: run-legacy-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main diff --git a/.github/workflows/run-notebook-tests-workflow.yaml b/.github/workflows/run-notebook-tests-workflow.yaml index d0dfd31ea..dc4c723d3 100644 --- a/.github/workflows/run-notebook-tests-workflow.yaml +++ b/.github/workflows/run-notebook-tests-workflow.yaml @@ -21,7 +21,7 @@ env: jobs: run-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/run-tests-workflow.yaml b/.github/workflows/run-tests-workflow.yaml index 1ddc8b522..73677654a 100644 --- a/.github/workflows/run-tests-workflow.yaml +++ b/.github/workflows/run-tests-workflow.yaml @@ -22,7 +22,7 @@ env: jobs: run-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index ade8940b2..5e5d025c1 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -5,7 +5,7 @@ on: jobs: stale: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/stale@v9 with: From 398242dcb2080d4fb47cb153fd47f3cb64ca4d7f Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 11:25:03 +0100 Subject: [PATCH 03/21] More type fixes --- src/pydvl/parallel/futures/ray.py | 2 +- src/pydvl/reporting/plots.py | 30 +++++++++++-------- .../methods/_solve_least_core_problems.py | 5 ++-- src/pydvl/valuation/methods/gt_shapley.py | 7 +++-- src/pydvl/value/least_core/common.py | 4 +-- src/pydvl/value/shapley/gt.py | 5 ++-- 6 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/pydvl/parallel/futures/ray.py b/src/pydvl/parallel/futures/ray.py index 17d302a87..002d71c04 100644 --- a/src/pydvl/parallel/futures/ray.py +++ b/src/pydvl/parallel/futures/ray.py @@ -97,7 +97,7 @@ def __init__( # Work Item Manager Thread self._work_item_manager_thread: Optional[_WorkItemManagerThread] = None - def submit(self, fn: Callable[..., T], *args, **kwargs) -> "Future[T]": + def submit(self, fn: Callable[..., T], /, *args, **kwargs) -> Future[T]: r"""Submits a callable to be executed with the given arguments. Schedules the callable to be executed as fn(\*args, \**kwargs) diff --git a/src/pydvl/reporting/plots.py b/src/pydvl/reporting/plots.py index 147ae1d7a..2869b2f30 100644 --- a/src/pydvl/reporting/plots.py +++ b/src/pydvl/reporting/plots.py @@ -62,9 +62,9 @@ def shaded_mean_std( ax.fill_between(abscissa, mean - std, mean + std, alpha=0.3, color=shade_color) ax.plot(abscissa, mean, color=mean_color, **kwargs) - ax.set_title(title) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) + ax.set_title(title or "") + ax.set_xlabel(xlabel or "") + ax.set_ylabel(ylabel or "") return ax @@ -110,9 +110,11 @@ def plot_ci_array( variances=variances, counts=np.ones_like(means, dtype=np.int_) * m, indices=np.arange(n), - data_names=np.array(abscissa, dtype=str) - if abscissa is not None - else np.arange(n, dtype=str), + data_names=( + np.array(abscissa, dtype=str) + if abscissa is not None + else np.arange(n, dtype=str) + ), ) return plot_ci_values( @@ -135,7 +137,7 @@ def plot_ci_values( shade_color: Optional[str] = "lightblue", ax: Optional[plt.Axes] = None, **kwargs, -): +) -> plt.Axes: """Plot values and a confidence interval. Uses `values.data_names` for the x-axis. @@ -163,9 +165,11 @@ def plot_ci_values( ppfs = { "normal": norm.ppf, "t": partial(t.ppf, df=values.counts - 1), - "auto": norm.ppf - if np.min(values.counts) > 30 - else partial(t.ppf, df=values.counts - 1), + "auto": ( + norm.ppf + if np.min(values.counts) > 30 + else partial(t.ppf, df=values.counts - 1) + ), } try: @@ -264,9 +268,9 @@ def plot_shapley( yerr = norm.ppf(1 - level / 2) * df[f"{prefix}_stderr"] ax.errorbar(x=df.index, y=df[prefix], yerr=yerr, fmt="o", capsize=6) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - ax.set_title(title) + ax.set_xlabel(xlabel or "") + ax.set_ylabel(ylabel or "") + ax.set_title(title or "") plt.xticks(rotation=60) return ax diff --git a/src/pydvl/valuation/methods/_solve_least_core_problems.py b/src/pydvl/valuation/methods/_solve_least_core_problems.py index e5ea92399..555f3f5a4 100644 --- a/src/pydvl/valuation/methods/_solve_least_core_problems.py +++ b/src/pydvl/valuation/methods/_solve_least_core_problems.py @@ -2,8 +2,7 @@ import logging import warnings -from functools import partial -from typing import List, NamedTuple, Sequence, Tuple +from typing import NamedTuple, Tuple, cast import cvxpy as cp import numpy as np @@ -234,7 +233,7 @@ def _solve_least_core_linear_program( "maximum number of iterations in solver_options", RuntimeWarning, ) - subsidy = e.value.item() + subsidy = cast(NDArray[np.float_], e.value).item() return x.value, subsidy if problem.status in cp.settings.INF_OR_UNB: diff --git a/src/pydvl/valuation/methods/gt_shapley.py b/src/pydvl/valuation/methods/gt_shapley.py index 22baf508e..65fee7b3d 100644 --- a/src/pydvl/valuation/methods/gt_shapley.py +++ b/src/pydvl/valuation/methods/gt_shapley.py @@ -24,6 +24,7 @@ https://arxiv.org/pdf/2302.11431). """ + from __future__ import annotations import logging @@ -347,11 +348,13 @@ def solve_group_testing_problem( if cp_problem.status != "optimal": log.warning(f"cvxpy returned status {cp_problem.status}") values = ( - np.nan * np.ones_like(n_obs) if not hasattr(v.value, "__len__") else v.value + np.nan * np.ones_like(n_obs) + if not hasattr(v.value, "__len__") + else cast(NDArray[np.float_], v.value) ) status = Status.Failed else: - values = v.value + values = cast(NDArray[np.float_], v.value) status = Status.Converged result = ValuationResult( diff --git a/src/pydvl/value/least_core/common.py b/src/pydvl/value/least_core/common.py index 0e0ceb553..d7842ed7e 100644 --- a/src/pydvl/value/least_core/common.py +++ b/src/pydvl/value/least_core/common.py @@ -1,7 +1,7 @@ import itertools import logging import warnings -from typing import List, NamedTuple, Optional, Sequence, Tuple +from typing import List, NamedTuple, Optional, Sequence, Tuple, cast import cvxpy as cp import numpy as np @@ -286,7 +286,7 @@ def _solve_least_core_linear_program( "maximum number of iterations in solver_options", RuntimeWarning, ) - subsidy = e.value.item() + subsidy = cast(NDArray[np.float_], e.value).item() return x.value, subsidy if problem.status in cp.settings.INF_OR_UNB: diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py index 17d83e69b..81286a7a4 100644 --- a/src/pydvl/value/shapley/gt.py +++ b/src/pydvl/value/shapley/gt.py @@ -22,6 +22,7 @@ In: Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics, pp. 1167–1176. PMLR. """ + import logging from collections import namedtuple from typing import Iterable, Optional, Tuple, TypeVar, Union, cast @@ -309,11 +310,11 @@ def reducer( values = ( np.nan * np.ones_like(u.data.indices) if not hasattr(v.value, "__len__") - else v.value + else cast(NDArray[np.float_], v.value) ) status = Status.Failed else: - values = v.value + values = cast(NDArray[np.float_], v.value) status = Status.Converged return ValuationResult( From df06949fcdb779dcd9cbf44383b54e1e88080d29 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 12:18:34 +0100 Subject: [PATCH 04/21] Drop pylint, use ruff, fix a bunch of warnings --- .pre-commit-config.yaml | 13 ++--- CONTRIBUTING.md | 4 +- build_scripts/generate_api_docs.py | 1 + build_scripts/run_pylint.py | 19 ------- notebooks/data_oob.ipynb | 4 +- notebooks/influence_imagenet.ipynb | 24 +++++---- notebooks/influence_sentiment_analysis.ipynb | 6 +-- notebooks/influence_synthetic.ipynb | 22 ++++---- notebooks/influence_wine.ipynb | 54 +++++++++---------- notebooks/least_core_basic.ipynb | 6 +-- notebooks/least_core_basic_new.ipynb | 24 ++++----- notebooks/msr_banzhaf_digits.ipynb | 7 ++- notebooks/shapley_basic_spotify.ipynb | 5 +- notebooks/shapley_knn_flowers.ipynb | 3 +- notebooks/shapley_utility_learning.ipynb | 8 +-- notebooks/support/common.py | 4 +- notebooks/support/torch.py | 2 +- public/index.html | 3 +- pyproject.toml | 31 ++++++----- requirements-linting.txt | 6 +-- src/pydvl/__init__.py | 1 + src/pydvl/influence/__init__.py | 5 +- src/pydvl/influence/array.py | 1 + .../base_influence_function_model.py | 9 ++-- src/pydvl/influence/influence_calculator.py | 4 +- src/pydvl/influence/torch/__init__.py | 6 +-- src/pydvl/influence/torch/base.py | 5 -- src/pydvl/influence/torch/batch_operation.py | 4 +- src/pydvl/influence/torch/functional.py | 2 +- .../torch/influence_function_model.py | 2 - src/pydvl/influence/torch/operator.py | 19 ++----- src/pydvl/influence/torch/preconditioner.py | 2 +- src/pydvl/influence/torch/util.py | 1 + src/pydvl/influence/types.py | 1 - src/pydvl/parallel/__init__.py | 5 +- src/pydvl/parallel/backend.py | 15 ++---- src/pydvl/parallel/map_reduce.py | 1 + src/pydvl/reporting/scores.py | 2 +- src/pydvl/utils/caching/__init__.py | 1 + src/pydvl/utils/caching/memcached.py | 3 +- src/pydvl/utils/numeric.py | 7 ++- src/pydvl/utils/progress.py | 2 +- src/pydvl/utils/score.py | 6 +-- src/pydvl/utils/types.py | 13 ++--- src/pydvl/utils/utility.py | 2 +- src/pydvl/valuation/base.py | 3 +- src/pydvl/valuation/games.py | 5 +- .../methods/_solve_least_core_problems.py | 2 - src/pydvl/valuation/methods/data_banzhaf.py | 1 + src/pydvl/valuation/methods/gt_shapley.py | 2 +- src/pydvl/valuation/methods/knn_shapley.py | 1 + src/pydvl/valuation/methods/least_core.py | 20 +++---- src/pydvl/valuation/methods/loo.py | 3 +- src/pydvl/valuation/methods/msr_banzhaf.py | 1 + src/pydvl/valuation/methods/owen_shapley.py | 1 - src/pydvl/valuation/methods/twodshapley.py | 1 + src/pydvl/valuation/result.py | 21 +++----- src/pydvl/valuation/samplers/__init__.py | 1 + src/pydvl/valuation/samplers/base.py | 1 - src/pydvl/valuation/samplers/classwise.py | 2 +- src/pydvl/valuation/samplers/powerset.py | 8 ++- src/pydvl/valuation/scorers/base.py | 3 +- src/pydvl/valuation/scorers/classwise.py | 2 +- src/pydvl/valuation/scorers/supervised.py | 3 +- src/pydvl/valuation/scorers/utils.py | 3 +- src/pydvl/valuation/stopping.py | 5 +- src/pydvl/valuation/types.py | 3 +- src/pydvl/valuation/utility/modelutility.py | 2 +- src/pydvl/valuation/utils.py | 16 ++---- src/pydvl/value/__init__.py | 18 +++---- src/pydvl/value/games.py | 4 +- src/pydvl/value/least_core/__init__.py | 2 +- src/pydvl/value/least_core/common.py | 30 +++++------ src/pydvl/value/oob/oob.py | 1 + src/pydvl/value/result.py | 20 +++---- src/pydvl/value/sampler.py | 9 ++-- src/pydvl/value/semivalues.py | 5 +- src/pydvl/value/shapley/classwise.py | 7 +-- src/pydvl/value/shapley/gt.py | 2 +- src/pydvl/value/shapley/montecarlo.py | 3 +- src/pydvl/value/shapley/owen.py | 6 +-- src/pydvl/value/shapley/truncated.py | 6 +-- src/pydvl/value/stopping.py | 3 +- tests/conftest.py | 8 ++- tests/influence/test_influence_calculator.py | 1 - tests/influence/torch/test_batch_operation.py | 2 +- tests/influence/torch/test_functional.py | 18 +++---- tests/influence/torch/test_influence_model.py | 2 +- tests/influence/torch/test_operator.py | 2 +- tests/influence/torch/test_util.py | 2 +- tests/parallel/test_parallel.py | 2 +- tests/test_results.py | 2 +- tests/utils/test_dataset.py | 6 +-- tests/utils/test_numeric.py | 8 ++- tests/utils/test_utility.py | 6 +-- .../test_deterministic_shapley_valuations.py | 1 + .../test_montecarlo_shapley_valuations.py | 6 ++- tests/valuation/methods/test_semivalues.py | 2 - .../methods/test_solve_least_core_problems.py | 1 - tests/valuation/samplers/__init__.py | 6 +-- tests/valuation/test_interface.py | 4 +- tests/valuation/test_result.py | 2 +- tests/valuation/utils.py | 5 +- tests/value/conftest.py | 1 - tests/value/shapley/test_classwise.py | 9 ++-- tests/value/shapley/test_montecarlo.py | 10 ++-- tests/value/shapley/test_truncated.py | 3 +- tests/value/utils.py | 5 +- tox.ini | 4 +- 109 files changed, 311 insertions(+), 398 deletions(-) delete mode 100644 build_scripts/run_pylint.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8386c63fc..52cb7ad7c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,12 @@ fail_fast: false repos: - - repo: https://github.com/psf/black - rev: 22.10.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.1 hooks: - - id: black-jupyter - language_version: python3 - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort + - id: ruff + args: [ --fix ] + - id: ruff-format - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ee4e1166d..59d50e1b5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -89,9 +89,9 @@ failing pipelines. tox will: * run the test suite * build the documentation * build and test installation of the package. -* generate coverage and pylint reports in html, as well as badges. +* generate coverage reports in html, as well as badges. -You can configure pytest, coverage and pylint by adjusting +You can configure pytest, coverage and ruff by adjusting [pyproject.toml](pyproject.toml). Besides the usual unit tests, most algorithms are tested using pytest. This diff --git a/build_scripts/generate_api_docs.py b/build_scripts/generate_api_docs.py index 99751aaa8..4193ff27b 100644 --- a/build_scripts/generate_api_docs.py +++ b/build_scripts/generate_api_docs.py @@ -1,4 +1,5 @@ """Generate the code reference pages.""" + from pathlib import Path import mkdocs_gen_files diff --git a/build_scripts/run_pylint.py b/build_scripts/run_pylint.py deleted file mode 100644 index bd18c13a3..000000000 --- a/build_scripts/run_pylint.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys - -import anybadge -from pylint.lint import Run - -FAIL_THRESHOLD = 6 - -badge_thresholds = {7: "orange", 8: "yellow", 9: "green"} - -results = Run(["src", "--max-line-length=120"], exit=False) - -score = round(results.linter.stats.global_note, 2) -# NOTE: we need to do this ourselves instead of using the --fail-under flag, since we want the badge -# to be produced if we are above the threshold and therefore have to use exit=False -if score < FAIL_THRESHOLD: - sys.exit(f"Pylint failed: score is below threshold {FAIL_THRESHOLD}") - -badge = anybadge.Badge("pylint", score, thresholds=badge_thresholds) -badge.write_badge("badges/pylint.svg", overwrite=True) diff --git a/notebooks/data_oob.ipynb b/notebooks/data_oob.ipynb index 5959a0c79..1ffd1bd1d 100644 --- a/notebooks/data_oob.ipynb +++ b/notebooks/data_oob.ipynb @@ -67,8 +67,8 @@ "import numpy as np\n", "import pandas as pd\n", "from sklearn.neighbors import KNeighborsClassifier\n", - "from tqdm.notebook import tqdm\n", "from support.common import load_adult_data\n", + "from tqdm.notebook import tqdm\n", "\n", "from pydvl.parallel import init_executor\n", "from pydvl.reporting.plots import plot_ci_array, plot_ci_values\n", @@ -369,7 +369,7 @@ "shade_colors = [\"lightskyblue\", \"firebrick\", \"seagreen\", \"gold\", \"plum\"]\n", "fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[15, 5])\n", "\n", - "for (n_est, values, mean_color, shade_color) in zip(\n", + "for n_est, values, mean_color, shade_color in zip(\n", " n_estimators, oob_values, mean_colors, shade_colors\n", "):\n", " values.sort(key=\"value\")\n", diff --git a/notebooks/influence_imagenet.ipynb b/notebooks/influence_imagenet.ipynb index fc33dbc64..d45820de7 100644 --- a/notebooks/influence_imagenet.ipynb +++ b/notebooks/influence_imagenet.ipynb @@ -76,30 +76,31 @@ "source": [ "%autoreload\n", "%matplotlib inline\n", - "from typing import Tuple\n", "import logging\n", + "import os\n", + "from typing import Tuple\n", + "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import os\n", "import pandas as pd\n", "import torch\n", - "from torch import nn\n", - "from torch.utils.data import DataLoader, TensorDataset\n", "from support.common import (\n", - " plot_sample_images,\n", - " plot_lowest_highest_influence_images,\n", - " plot_losses,\n", + " compute_mean_corrupted_influences,\n", " corrupt_imagenet,\n", " load_preprocess_imagenet,\n", " plot_corrupted_influences_distribution,\n", - " compute_mean_corrupted_influences,\n", + " plot_losses,\n", + " plot_lowest_highest_influence_images,\n", + " plot_sample_images,\n", ")\n", "from support.torch import (\n", - " TrainingManager,\n", " MODEL_PATH,\n", + " TrainingManager,\n", " new_resnet_model,\n", ")\n", "from support.types import Losses\n", + "from torch import nn\n", + "from torch.utils.data import DataLoader, TensorDataset\n", "\n", "logging.basicConfig(level=logging.INFO)\n", "default_figsize = (7, 7)\n", @@ -121,9 +122,10 @@ "metadata": {}, "outputs": [], "source": [ + "from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, f1_score\n", + "\n", "from pydvl.influence.torch import CgInfluence\n", - "from pydvl.reporting.plots import plot_influence_distribution_by_label\n", - "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score" + "from pydvl.reporting.plots import plot_influence_distribution_by_label" ] }, { diff --git a/notebooks/influence_sentiment_analysis.ipynb b/notebooks/influence_sentiment_analysis.ipynb index c66d51a5e..ec54a117a 100644 --- a/notebooks/influence_sentiment_analysis.ipynb +++ b/notebooks/influence_sentiment_analysis.ipynb @@ -94,10 +94,10 @@ "from datasets import load_dataset\n", "from IPython.display import HTML, display\n", "from sklearn.metrics import f1_score\n", + "from support.torch import ImdbDataset, ModelLogitsWrapper\n", "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", "\n", - "from pydvl.influence.torch import EkfacInfluence\n", - "from support.torch import ImdbDataset, ModelLogitsWrapper" + "from pydvl.influence.torch import EkfacInfluence" ] }, { @@ -1215,7 +1215,7 @@ " for idx, mean_infl in enumerate(group_df[\"mean_infl\"]):\n", " if idx == 0:\n", " continue\n", - " reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx-1]} -> {group_df['reg_value'].iloc[idx]}\"\n", + " reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx - 1]} -> {group_df['reg_value'].iloc[idx]}\"\n", " pearson = pearsonr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n", " spearman = spearmanr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n", " result_corr[layer_id + \"_pearson\"].update({f\"{reg_value_diff}\": pearson})\n", diff --git a/notebooks/influence_synthetic.ipynb b/notebooks/influence_synthetic.ipynb index 25b7ada53..cc47cda6b 100644 --- a/notebooks/influence_synthetic.ipynb +++ b/notebooks/influence_synthetic.ipynb @@ -96,27 +96,29 @@ "\n", "import os\n", "import random\n", + "\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import torch\n", "import torch.nn.functional as F\n", - "import matplotlib.pyplot as plt\n", - "from pydvl.influence.torch import DirectInfluence, CgInfluence\n", - "from support.shapley import (\n", - " synthetic_classification_dataset,\n", - " decision_boundary_fixed_variance_2d,\n", - ")\n", + "from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix\n", "from support.common import (\n", " plot_gaussian_blobs,\n", - " plot_losses,\n", " plot_influences,\n", + " plot_losses,\n", + ")\n", + "from support.shapley import (\n", + " decision_boundary_fixed_variance_2d,\n", + " synthetic_classification_dataset,\n", ")\n", "from support.torch import (\n", - " fit_torch_model,\n", " TorchLogisticRegression,\n", + " fit_torch_model,\n", ")\n", - "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", "from torch.optim import AdamW, lr_scheduler\n", - "from torch.utils.data import DataLoader, TensorDataset" + "from torch.utils.data import DataLoader, TensorDataset\n", + "\n", + "from pydvl.influence.torch import CgInfluence, DirectInfluence" ] }, { diff --git a/notebooks/influence_wine.ipynb b/notebooks/influence_wine.ipynb index 7ec902438..6313897ca 100644 --- a/notebooks/influence_wine.ipynb +++ b/notebooks/influence_wine.ipynb @@ -81,22 +81,22 @@ "import numpy as np\n", "import torch\n", "import torch.nn.functional as F\n", + "from scipy.stats import pearsonr, spearmanr\n", + "from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, f1_score\n", "from support.common import plot_losses\n", + "from support.shapley import load_wine_dataset\n", "from support.torch import TorchMLP, fit_torch_model\n", + "from torch.optim import Adam, lr_scheduler\n", + "from torch.utils.data import DataLoader, TensorDataset\n", + "\n", "from pydvl.influence.torch import (\n", - " DirectInfluence,\n", - " CgInfluence,\n", " ArnoldiInfluence,\n", + " CgInfluence,\n", + " DirectInfluence,\n", " EkfacInfluence,\n", - " NystroemSketchInfluence,\n", " LissaInfluence,\n", - ")\n", - "from pydvl.influence import InfluenceMode\n", - "from support.shapley import load_wine_dataset\n", - "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\n", - "from torch.optim import Adam, lr_scheduler\n", - "from torch.utils.data import DataLoader, TensorDataset\n", - "from scipy.stats import pearsonr, spearmanr" + " NystroemSketchInfluence,\n", + ")" ] }, { @@ -798,7 +798,7 @@ ], "source": [ "print(\n", - " f\"Percentage error of Cg over direct method:{np.mean(np.abs(mean_cg_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n", + " f\"Percentage error of Cg over direct method:{np.mean(np.abs(mean_cg_train_influences - mean_train_influences) / np.abs(mean_train_influences)) * 100} %\"\n", ")" ] }, @@ -869,11 +869,11 @@ ], "source": [ "print(\n", - " f\"Pearson Correlation Cg vs direct\",\n", + " \"Pearson Correlation Cg vs direct\",\n", " pearsonr(mean_cg_train_influences, mean_train_influences).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation Cg vs direct\",\n", + " \"Spearman Correlation Cg vs direct\",\n", " spearmanr(mean_cg_train_influences, mean_train_influences).statistic,\n", ")" ] @@ -962,7 +962,7 @@ ], "source": [ "print(\n", - " f\"Percentage error of Lissa over direct method:{np.mean(np.abs(mean_lissa_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n", + " f\"Percentage error of Lissa over direct method:{np.mean(np.abs(mean_lissa_train_influences - mean_train_influences) / np.abs(mean_train_influences)) * 100} %\"\n", ")" ] }, @@ -1033,11 +1033,11 @@ ], "source": [ "print(\n", - " f\"Pearson Correlation Lissa vs direct\",\n", + " \"Pearson Correlation Lissa vs direct\",\n", " pearsonr(mean_lissa_train_influences, mean_train_influences).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation Lissa vs direct\",\n", + " \"Spearman Correlation Lissa vs direct\",\n", " spearmanr(mean_lissa_train_influences, mean_train_influences).statistic,\n", ")" ] @@ -1114,7 +1114,7 @@ ], "source": [ "print(\n", - " f\"Percentage error of Arnoldi over direct method:{np.mean(np.abs(mean_arnoldi_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n", + " f\"Percentage error of Arnoldi over direct method:{np.mean(np.abs(mean_arnoldi_train_influences - mean_train_influences) / np.abs(mean_train_influences)) * 100} %\"\n", ")" ] }, @@ -1185,11 +1185,11 @@ ], "source": [ "print(\n", - " f\"Pearson Correlation Arnoldi vs direct\",\n", + " \"Pearson Correlation Arnoldi vs direct\",\n", " pearsonr(mean_arnoldi_train_influences, mean_train_influences).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation Arnoldi vs direct\",\n", + " \"Spearman Correlation Arnoldi vs direct\",\n", " spearmanr(mean_arnoldi_train_influences, mean_train_influences).statistic,\n", ")" ] @@ -1277,7 +1277,7 @@ ], "source": [ "print(\n", - " f\"Percentage error of Nyström over direct method:{np.mean(np.abs(mean_nystroem_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n", + " f\"Percentage error of Nyström over direct method:{np.mean(np.abs(mean_nystroem_train_influences - mean_train_influences) / np.abs(mean_train_influences)) * 100} %\"\n", ")" ] }, @@ -1348,11 +1348,11 @@ ], "source": [ "print(\n", - " f\"Pearson Correlation Nyström vs direct\",\n", + " \"Pearson Correlation Nyström vs direct\",\n", " pearsonr(mean_nystroem_train_influences, mean_train_influences).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation Nyström vs direct\",\n", + " \"Spearman Correlation Nyström vs direct\",\n", " spearmanr(mean_nystroem_train_influences, mean_train_influences).statistic,\n", ")" ] @@ -1428,7 +1428,7 @@ ], "source": [ "print(\n", - " f\"Percentage error of EK-FAC over direct method:{np.mean(np.abs(mean_ekfac_train_influences - mean_train_influences)/np.abs(mean_train_influences))*100} %\"\n", + " f\"Percentage error of EK-FAC over direct method:{np.mean(np.abs(mean_ekfac_train_influences - mean_train_influences) / np.abs(mean_train_influences)) * 100} %\"\n", ")" ] }, @@ -1515,11 +1515,11 @@ ], "source": [ "print(\n", - " f\"Pearson Correlation EK-FAC vs direct\",\n", + " \"Pearson Correlation EK-FAC vs direct\",\n", " pearsonr(mean_ekfac_train_influences, mean_train_influences).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation EK-FAC vs direct\",\n", + " \"Spearman Correlation EK-FAC vs direct\",\n", " spearmanr(mean_ekfac_train_influences, mean_train_influences).statistic,\n", ")" ] @@ -1558,14 +1558,14 @@ "source": [ "highest_inlfuence_idxs = np.argsort(np.abs(mean_train_influences))[-20:]\n", "print(\n", - " f\"Pearson Correlation EK-FAC vs direct - top-20 influences\",\n", + " \"Pearson Correlation EK-FAC vs direct - top-20 influences\",\n", " pearsonr(\n", " mean_ekfac_train_influences[highest_inlfuence_idxs],\n", " mean_train_influences[highest_inlfuence_idxs],\n", " ).statistic,\n", ")\n", "print(\n", - " f\"Spearman Correlation EK-FAC vs direct - top-20 influences\",\n", + " \"Spearman Correlation EK-FAC vs direct - top-20 influences\",\n", " spearmanr(\n", " mean_ekfac_train_influences[highest_inlfuence_idxs],\n", " mean_train_influences[highest_inlfuence_idxs],\n", diff --git a/notebooks/least_core_basic.ipynb b/notebooks/least_core_basic.ipynb index 5967cfe0f..da400d780 100644 --- a/notebooks/least_core_basic.ipynb +++ b/notebooks/least_core_basic.ipynb @@ -138,13 +138,13 @@ "outputs": [], "source": [ "%autoreload\n", + "from pydvl.reporting.plots import shaded_mean_std\n", + "from pydvl.reporting.scores import compute_removal_score\n", "from pydvl.utils import (\n", " Dataset,\n", " Utility,\n", ")\n", - "from pydvl.value import compute_least_core_values, LeastCoreMode, ValuationResult\n", - "from pydvl.reporting.plots import shaded_mean_std\n", - "from pydvl.reporting.scores import compute_removal_score" + "from pydvl.value import LeastCoreMode, ValuationResult, compute_least_core_values" ] }, { diff --git a/notebooks/least_core_basic_new.ipynb b/notebooks/least_core_basic_new.ipynb index 1452e489e..1caacac90 100644 --- a/notebooks/least_core_basic_new.ipynb +++ b/notebooks/least_core_basic_new.ipynb @@ -68,32 +68,28 @@ { "cell_type": "code", "execution_count": null, - "id": "08ee61fd", - "metadata": { - "tags": [ - "hide" - ] - }, + "id": "a6c212a517774f6b", + "metadata": {}, "outputs": [], "source": [ "%autoreload\n", "%matplotlib inline\n", "\n", - "from __future__ import annotations\n", + "from __future__ import annotations # noqa: F404\n", "\n", "import os\n", "import random\n", "import warnings\n", + "from typing import Iterable\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "from numpy.typing import NDArray\n", "from sklearn.datasets import make_classification\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import mean_squared_error\n", "from tqdm.auto import tqdm, trange\n", - "from typing import Iterable\n", - "from numpy.typing import NDArray\n", "\n", "warnings.simplefilter(\"ignore\")\n", "\n", @@ -142,15 +138,15 @@ "outputs": [], "source": [ "%autoreload\n", + "from pydvl.reporting.plots import shaded_mean_std\n", "from pydvl.valuation import (\n", - " ExactLeastCoreValuation,\n", - " MonteCarloLeastCoreValuation,\n", - " ValuationResult,\n", " Dataset,\n", + " ExactLeastCoreValuation,\n", " ModelUtility,\n", + " MonteCarloLeastCoreValuation,\n", " SupervisedScorer,\n", + " ValuationResult,\n", ")\n", - "from pydvl.reporting.plots import shaded_mean_std\n", "from pydvl.valuation.types import Sample" ] }, @@ -509,7 +505,7 @@ "\n", " if len(values) != len(training_data.indices):\n", " raise ValueError(\n", - " f\"The number of values, {len(values) }, should be equal to the number of data indices, {len(training_data.indices)}\"\n", + " f\"The number of values, {len(values)}, should be equal to the number of data indices, {len(training_data.indices)}\"\n", " )\n", "\n", " scores = {}\n", diff --git a/notebooks/msr_banzhaf_digits.ipynb b/notebooks/msr_banzhaf_digits.ipynb index 046d318e2..ce453b630 100644 --- a/notebooks/msr_banzhaf_digits.ipynb +++ b/notebooks/msr_banzhaf_digits.ipynb @@ -56,9 +56,8 @@ "import os\n", "import random\n", "\n", - "import numpy as np\n", "import matplotlib.pyplot as plt\n", - "\n", + "import numpy as np\n", "from tqdm import tqdm\n", "\n", "plt.ioff() # Prevent jupyter from automatically plotting\n", @@ -90,8 +89,9 @@ "outputs": [], "source": [ "%autoreload\n", - "from pydvl.reporting.plots import plot_shapley\n", "from support.banzhaf import load_digits_dataset\n", + "\n", + "from pydvl.reporting.plots import plot_shapley\n", "from pydvl.value import *" ] }, @@ -623,7 +623,6 @@ "source": [ "from scipy.stats import norm\n", "\n", - "\n", "plot_data = anomalous_df.loc[anomalous_indices].copy()\n", "plot_data[\"original_banzhaf_value\"] = df.loc[anomalous_indices][\"banzhaf_value\"]\n", "plot_data[\"original_banzhaf_value_stderr\"] = df.loc[anomalous_indices][\n", diff --git a/notebooks/shapley_basic_spotify.ipynb b/notebooks/shapley_basic_spotify.ipynb index 4eabc22c9..09492bfaa 100644 --- a/notebooks/shapley_basic_spotify.ipynb +++ b/notebooks/shapley_basic_spotify.ipynb @@ -99,9 +99,10 @@ "outputs": [], "source": [ "%autoreload\n", + "from support.shapley import load_spotify_dataset\n", + "\n", "from pydvl.reporting.plots import plot_shapley\n", "from pydvl.utils.dataset import GroupedDataset\n", - "from support.shapley import load_spotify_dataset\n", "from pydvl.value import *" ] }, @@ -633,7 +634,7 @@ ")\n", "error_all_data = mean_absolute_error(model_all_data.predict(test_data[0]), test_data[1])\n", "\n", - "print(f\"Improvement: {100*(error_all_data - error_good_data)/error_all_data:02f}%\")" + "print(f\"Improvement: {100 * (error_all_data - error_good_data) / error_all_data:02f}%\")" ] }, { diff --git a/notebooks/shapley_knn_flowers.ipynb b/notebooks/shapley_knn_flowers.ipynb index 113cddb74..24e8479f0 100644 --- a/notebooks/shapley_knn_flowers.ipynb +++ b/notebooks/shapley_knn_flowers.ipynb @@ -67,11 +67,12 @@ "%matplotlib inline\n", "\n", "import random\n", + "from copy import deepcopy\n", + "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import sklearn as sk\n", "from sklearn import datasets\n", - "from copy import deepcopy\n", "from support.common import plot_iris\n", "\n", "plt.rcParams[\"figure.figsize\"] = (20, 8)\n", diff --git a/notebooks/shapley_utility_learning.ipynb b/notebooks/shapley_utility_learning.ipynb index d6ad5cb7c..42356714d 100644 --- a/notebooks/shapley_utility_learning.ipynb +++ b/notebooks/shapley_utility_learning.ipynb @@ -93,16 +93,16 @@ "import os\n", "import random\n", "import time\n", + "from itertools import product\n", "from warnings import simplefilter\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "from itertools import product\n", - "from sklearn.svm import LinearSVC as _LinearSVC\n", "from sklearn.datasets import load_iris\n", - "from sklearn.neural_network import MLPRegressor\n", "from sklearn.exceptions import ConvergenceWarning\n", + "from sklearn.neural_network import MLPRegressor\n", + "from sklearn.svm import LinearSVC as _LinearSVC\n", "from tqdm.auto import tqdm\n", "\n", "plt.ioff() # Prevent jupyter from automatically plotting\n", @@ -155,8 +155,8 @@ "outputs": [], "source": [ "%autoreload\n", - "from pydvl.utils import DataUtilityLearning, top_k_value_accuracy\n", "from pydvl.reporting.plots import shaded_mean_std\n", + "from pydvl.utils import DataUtilityLearning, top_k_value_accuracy\n", "from pydvl.value import *" ] }, diff --git a/notebooks/support/common.py b/notebooks/support/common.py index 6a5242047..6df112246 100644 --- a/notebooks/support/common.py +++ b/notebooks/support/common.py @@ -456,7 +456,7 @@ def corrupt_imagenet( fraction_to_corrupt: float, avg_influences: NDArray[np.float64], ) -> Tuple[pd.DataFrame, Dict[Any, List[int]]]: - """Given the preprocessed tiny imagenet dataset (or a subset of it), + """Given the preprocessed tiny imagenet dataset (or a subset of it), it takes a fraction of the images with the highest influence and (randomly) flips their labels. @@ -653,7 +653,7 @@ def load_adult_data(): data_adult = pd.read_csv( data_url, names=column_names, - sep=",\s*", + sep=r",\s*", engine="python", na_values="?", dtype=data_types, diff --git a/notebooks/support/torch.py b/notebooks/support/torch.py index 7286dea51..ac02fc828 100644 --- a/notebooks/support/torch.py +++ b/notebooks/support/torch.py @@ -211,7 +211,7 @@ def train( losses = self.load() print("Cached model found, loading...") return losses - except: + except: # noqa print(f"No pretrained model found. Training for {n_epochs} epochs:") optimizer = Adam(self.model.parameters(), lr=lr) diff --git a/public/index.html b/public/index.html index 696ded113..d94d2c16c 100644 --- a/public/index.html +++ b/public/index.html @@ -9,8 +9,7 @@

Welcome to the valuation project pages!

- \ No newline at end of file + diff --git a/pyproject.toml b/pyproject.toml index eab0270b1..3404cdf7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,11 +6,6 @@ requires = [ ] build-backend = "setuptools.build_meta" -# Black-compatible settings for isort -# See https://black.readthedocs.io/en/stable/compatible_configs.html -[tool.isort] -profile = "black" - [tool.pytest.ini_options] addopts = "--failed-first --splitting-algorithm least_duration --cov-report=term-missing --cov-report=xml --durations=30 --verbose" testpaths = [ @@ -61,16 +56,23 @@ exclude_lines = [ "^if __name__ == ['\"]__main__['\"]:$", ] -[tool.pylint.messages_control] -disable = [ - "I0011", # reasoning -] +[tool.ruff] +line-length = 88 +fix = true +target-version = "py39" -[tool.pylint.master] -load-plugins = ["pylint_json2html"] +[tool.ruff.lint] +extend-select = ["I", "F", "E", "W"] # Common linting rules from Pylint and Flake8 +ignore = [ + "F403", + "F405", + "E501", # line too long (HACK: we should really stick to 88 chars) + "E741", # Checks for 'l', 'O', or 'I' as variable names (hard to read) + "E731", # Do not assign a `lambda` expression, use a `def` + "E402" -[tool.pylint.reports] -output-format = "jsonextended" +] +isort.known-first-party = ["pydvl"] [tool.mypy] python_version = "3.9" @@ -78,6 +80,3 @@ mypy_path = './src/' ignore_missing_imports = true warn_return_any = true warn_unused_configs = true - -[tool.black] -line-length = 88 diff --git a/requirements-linting.txt b/requirements-linting.txt index 020bf63f4..7981bcb1b 100644 --- a/requirements-linting.txt +++ b/requirements-linting.txt @@ -1,4 +1,2 @@ -pylint == 3.1.0 -anybadge -pylint-json2html==0.5.0 -pre-commit==3.1.1 \ No newline at end of file +ruff >= 0.9.1 +pre-commit==3.1.1 diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py index db1a736f4..e1f8d71c2 100644 --- a/src/pydvl/__init__.py +++ b/src/pydvl/__init__.py @@ -7,4 +7,5 @@ The two main modules you will want to look at are [value][pydvl.value] and [influence][pydvl.influence]. """ + __version__ = "0.9.3.dev0" diff --git a/src/pydvl/influence/__init__.py b/src/pydvl/influence/__init__.py index 187c98de1..a053b1caf 100644 --- a/src/pydvl/influence/__init__.py +++ b/src/pydvl/influence/__init__.py @@ -10,9 +10,10 @@ probably change. """ -from .influence_calculator import ( + +from .influence_calculator import ( # noqa: F401 DaskInfluenceCalculator, DisableClientSingleThreadCheck, SequentialInfluenceCalculator, ) -from .types import InfluenceMode +from .types import InfluenceMode # noqa: F401 diff --git a/src/pydvl/influence/array.py b/src/pydvl/influence/array.py index 7ad9a59f0..b50b18058 100644 --- a/src/pydvl/influence/array.py +++ b/src/pydvl/influence/array.py @@ -6,6 +6,7 @@ (chunked in one resp. two dimensions), with support for efficient storage and retrieval using the Zarr library. """ + from __future__ import annotations import logging diff --git a/src/pydvl/influence/base_influence_function_model.py b/src/pydvl/influence/base_influence_function_model.py index 1db6e1f9b..4c9b128f7 100644 --- a/src/pydvl/influence/base_influence_function_model.py +++ b/src/pydvl/influence/base_influence_function_model.py @@ -151,14 +151,12 @@ def influences( if x is None and y is not None: raise ValueError( - "Providing labels y, without providing model input x " - "is not supported" + "Providing labels y, without providing model input x is not supported" ) if x is not None and y is None: raise ValueError( - "Providing model input x, without providing labels y " - "is not supported" + "Providing model input x, without providing labels y is not supported" ) return self._influences(x_test, y_test, x, y, mode) @@ -446,8 +444,7 @@ def _influences( right_batch = None elif y is None: raise ValueError( - "Providing model input x, without providing labels y " - "is not supported" + "Providing model input x, without providing labels y is not supported" ) else: right_batch = self._create_batch(x, y) diff --git a/src/pydvl/influence/influence_calculator.py b/src/pydvl/influence/influence_calculator.py index 47005f105..badda0c80 100644 --- a/src/pydvl/influence/influence_calculator.py +++ b/src/pydvl/influence/influence_calculator.py @@ -385,7 +385,9 @@ def func( ): row = [] for x_chunk, y_chunk, chunk_size in zip( - x.to_delayed(), y.to_delayed(), x_chunk_sizes # type:ignore + x.to_delayed(), + y.to_delayed(), + x_chunk_sizes, # type:ignore ): if mode == InfluenceMode.Up: block_shape = (test_chunk_size, chunk_size) diff --git a/src/pydvl/influence/torch/__init__.py b/src/pydvl/influence/torch/__init__.py index 417910de0..980660c35 100644 --- a/src/pydvl/influence/torch/__init__.py +++ b/src/pydvl/influence/torch/__init__.py @@ -1,4 +1,4 @@ -from .influence_function_model import ( +from .influence_function_model import ( # noqa: F401 ArnoldiInfluence, CgInfluence, DirectInfluence, @@ -7,5 +7,5 @@ LissaInfluence, NystroemSketchInfluence, ) -from .preconditioner import JacobiPreconditioner, NystroemPreconditioner -from .util import BlockMode, SecondOrderMode +from .preconditioner import JacobiPreconditioner, NystroemPreconditioner # noqa: F401 +from .util import BlockMode, SecondOrderMode # noqa: F401 diff --git a/src/pydvl/influence/torch/base.py b/src/pydvl/influence/torch/base.py index 7ae0c7006..ccdf96c42 100644 --- a/src/pydvl/influence/torch/base.py +++ b/src/pydvl/influence/torch/base.py @@ -6,7 +6,6 @@ from typing import ( TYPE_CHECKING, Dict, - Generic, Iterable, List, Optional, @@ -23,14 +22,11 @@ from ..base_influence_function_model import ComposableInfluence from ..types import ( Batch, - BatchType, BilinearForm, BlockMapper, GradientProvider, - GradientProviderType, Operator, OperatorGradientComposition, - TensorType, ) from .util import ( BlockMode, @@ -528,7 +524,6 @@ def _validate_tensor_input(self, tensor: torch.Tensor) -> None: ) def _apply(self, tensor: torch.Tensor) -> torch.Tensor: - if tensor.ndim == 2: return self._apply_to_mat(tensor.to(self.device)) diff --git a/src/pydvl/influence/torch/batch_operation.py b/src/pydvl/influence/torch/batch_operation.py index e20abefd9..e58d66c6b 100644 --- a/src/pydvl/influence/torch/batch_operation.py +++ b/src/pydvl/influence/torch/batch_operation.py @@ -13,6 +13,7 @@ which is useful in the case that keeping $B$ in memory is not feasible. """ + from __future__ import annotations from abc import ABC, abstractmethod @@ -73,7 +74,6 @@ def to(self, device: torch.device): def apply_to_dict( self, batch: TorchBatch, mat_dict: Dict[str, torch.Tensor] ) -> Dict[str, torch.Tensor]: - if mat_dict.keys() != self.params_to_restrict_to.keys(): raise ValueError( "The keys of the matrix dictionary must match the keys of the " @@ -205,7 +205,6 @@ def _apply_to_vec(self, batch: TorchBatch, vec: torch.Tensor) -> torch.Tensor: def _apply_to_dict( self, batch: TorchBatch, mat_dict: Dict[str, torch.Tensor] ) -> Dict[str, torch.Tensor]: - func = self._create_seq_func(*batch) if self._has_batch_dim_dict(mat_dict): @@ -484,7 +483,6 @@ def _inverse_rank_one_update( def _generate_inverse_rank_one_updates( x: List[torch.Tensor], v: List[torch.Tensor], regularization: float ) -> Generator[torch.Tensor, None, None]: - x_v_iterator = enumerate(zip(x, v)) index, (x_, v_) = next(x_v_iterator) diff --git a/src/pydvl/influence/torch/functional.py b/src/pydvl/influence/torch/functional.py index f07cc3983..64f703fbb 100644 --- a/src/pydvl/influence/torch/functional.py +++ b/src/pydvl/influence/torch/functional.py @@ -29,7 +29,7 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, Optional, Union import torch from scipy.sparse.linalg import ArpackNoConvergence diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index 9e7d96325..793d4cfe8 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -35,7 +35,6 @@ HessianBatchOperation, ) from .functional import ( - create_hvp_function, create_per_sample_gradient_function, create_per_sample_mixed_derivative_function, gauss_newton, @@ -1437,7 +1436,6 @@ def _create_block( data: DataLoader, regularization: Optional[float], ) -> TorchOperatorGradientComposition: - assert regularization is not None regularization = cast(float, regularization) diff --git a/src/pydvl/influence/torch/operator.py b/src/pydvl/influence/torch/operator.py index 9941712d0..afdb63f16 100644 --- a/src/pydvl/influence/torch/operator.py +++ b/src/pydvl/influence/torch/operator.py @@ -9,7 +9,6 @@ from .base import ( LowRankBilinearForm, - OperatorBilinearForm, TensorDictOperator, TensorOperator, TorchBatch, @@ -25,7 +24,6 @@ ) from .functional import LowRankProductRepresentation from .preconditioner import Preconditioner -from .util import LossType logger = logging.getLogger(__name__) @@ -62,7 +60,6 @@ def input_dict_structure(self) -> Dict[str, Tuple[int, ...]]: return self.batch_operation.input_dict_structure def _apply_to_dict(self, mat: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: - tensor_dicts = ( self.batch_operation.apply_to_dict(TorchBatch(x, y), mat) for x, y in self.dataloader @@ -387,7 +384,6 @@ def __init__( progress: bool = False, warn_on_max_iteration: bool = True, ): - if regularization is not None and regularization < 0: raise ValueError("regularization must be non-negative") @@ -457,8 +453,8 @@ def _apply_to_vec(self, vec: torch.Tensor) -> torch.Tensor: mean_residual = torch.mean(torch.abs(residual / h_estimate)) logger.debug( f"Terminated Lissa after {k} iterations with " - f"{max_residual*100:.2f} % max residual and" - f" mean residual {mean_residual*100:.5f} %" + f"{max_residual * 100:.2f} % max residual and" + f" mean residual {mean_residual * 100:.5f} %" ) is_converged = True break @@ -470,8 +466,8 @@ def _apply_to_vec(self, vec: torch.Tensor) -> torch.Tensor: log_level, f"Reached max number of iterations {self.maxiter} without " f"achieving the desired tolerance {self.rtol}.\n " - f"Achieved max residual {max_residual*100:.2f} % and" - f" {mean_residual*100:.5f} % mean residual", + f"Achieved max residual {max_residual * 100:.2f} % and" + f" {mean_residual * 100:.5f} % mean residual", ) return h_estimate / self.scale @@ -510,7 +506,6 @@ def __init__( regularization: Optional[float] = None, exact: bool = True, ): - if exact and (regularization is None or regularization <= 0): raise ValueError("regularization must be positive when exact=True") elif regularization is not None and regularization < 0: @@ -551,14 +546,12 @@ def to(self, device: torch.device): return self def _apply_to_vec(self, vec: torch.Tensor) -> torch.Tensor: - if vec.ndim == 1: return self._apply_to_mat(vec.unsqueeze(0)).squeeze() return self._apply_to_mat(vec) def _apply_to_mat(self, mat: torch.Tensor) -> torch.Tensor: - D = self._low_rank_representation.eigen_vals.clone() V = self._low_rank_representation.projections @@ -652,7 +645,6 @@ def __init__( use_block_cg: bool = False, warn_on_max_iteration: bool = True, ): - if regularization is not None and regularization < 0: raise ValueError("regularization must be non-negative") @@ -713,7 +705,6 @@ def _apply_to_vec(self, vec: torch.Tensor) -> torch.Tensor: return self._apply_to_mat(vec.unsqueeze(0)) def _apply_to_mat(self, mat: torch.Tensor) -> torch.Tensor: - if self.use_block_cg: return self._solve_pbcg(mat) @@ -740,7 +731,6 @@ def _solve_pcg( b: torch.Tensor, tol: float, ) -> torch.Tensor: - x0 = torch.clone(b) maxiter = self.maxiter if maxiter is None: @@ -797,7 +787,6 @@ def _solve_pbcg( self, rhs: torch.Tensor, ): - # The block variant of conjugate gradient is known to suffer from breakdown, # due to the possibility of rank deficiency of the iterates of the parameter # matrix P^tAP, which destabilizes the direct solver. diff --git a/src/pydvl/influence/torch/preconditioner.py b/src/pydvl/influence/torch/preconditioner.py index 432ec81c5..69f149ded 100644 --- a/src/pydvl/influence/torch/preconditioner.py +++ b/src/pydvl/influence/torch/preconditioner.py @@ -25,6 +25,7 @@ class Preconditioner(ABC): condition number than $A + \lambda \operatorname{I}$. """ + _reg: Optional[float] @property @@ -256,7 +257,6 @@ def _fit( self._reg = regularization def _solve(self, rhs: torch.Tensor): - rhs_is_one_dim = rhs.ndim == 1 b = torch.atleast_2d(rhs).t() if rhs_is_one_dim else rhs diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py index c3c74bedf..c1b66ac93 100644 --- a/src/pydvl/influence/torch/util.py +++ b/src/pydvl/influence/torch/util.py @@ -507,6 +507,7 @@ class EkfacRepresentation: evecs_g: The g eigenvectors of the ekfac representation. diags: The diagonal elements of the factorized Hessian matrix. """ + layer_names: Iterable[str] layers_module: Iterable[torch.nn.Module] evecs_a: Iterable[torch.Tensor] diff --git a/src/pydvl/influence/types.py b/src/pydvl/influence/types.py index 35c220dbf..855d70eee 100644 --- a/src/pydvl/influence/types.py +++ b/src/pydvl/influence/types.py @@ -48,7 +48,6 @@ from enum import Enum from typing import ( Collection, - Dict, Generator, Generic, Iterable, diff --git a/src/pydvl/parallel/__init__.py b/src/pydvl/parallel/__init__.py index a3e776943..591f53f30 100644 --- a/src/pydvl/parallel/__init__.py +++ b/src/pydvl/parallel/__init__.py @@ -30,7 +30,7 @@ !!! tip "Passsing large objects" When running tasks which accept heavy inputs, it is important to first use `put()` on the object and use the returned reference - as argument to the callable within `submit()`. For example: + as argument to the callable within `submit()`. For example: ```python u_ref = parallel_backend.put(u) ... @@ -44,8 +44,9 @@ uses joblib's higher level API with `Parallel()` which then indirectly also supports the use of Dask and Ray. """ + # HACK to avoid circular imports -from ..utils.types import * # pylint: disable=wrong-import-order +from ..utils.types import * from .backend import * from .backends import * from .config import * diff --git a/src/pydvl/parallel/backend.py b/src/pydvl/parallel/backend.py index 52f08a46c..38c9761f2 100644 --- a/src/pydvl/parallel/backend.py +++ b/src/pydvl/parallel/backend.py @@ -66,24 +66,19 @@ def executor( ... @abstractmethod - def get(self, v: Any, *args, **kwargs): - ... + def get(self, v: Any, *args, **kwargs): ... @abstractmethod - def put(self, v: Any, *args, **kwargs) -> Any: - ... + def put(self, v: Any, *args, **kwargs) -> Any: ... @abstractmethod - def wrap(self, fun: Callable, **kwargs) -> Callable: - ... + def wrap(self, fun: Callable, **kwargs) -> Callable: ... @abstractmethod - def wait(self, v: Any, *args, **kwargs) -> Any: - ... + def wait(self, v: Any, *args, **kwargs) -> Any: ... @abstractmethod - def _effective_n_jobs(self, n_jobs: int) -> int: - ... + def _effective_n_jobs(self, n_jobs: int) -> int: ... def effective_n_jobs(self, n_jobs: int = -1) -> int: if n_jobs == 0: diff --git a/src/pydvl/parallel/map_reduce.py b/src/pydvl/parallel/map_reduce.py index 937674141..a55e07733 100644 --- a/src/pydvl/parallel/map_reduce.py +++ b/src/pydvl/parallel/map_reduce.py @@ -6,6 +6,7 @@ This interface might be deprecated or changed in a future release before 1.0 """ + import warnings from functools import reduce from itertools import accumulate, repeat diff --git a/src/pydvl/reporting/scores.py b/src/pydvl/reporting/scores.py index 5b1c09f07..94902e461 100644 --- a/src/pydvl/reporting/scores.py +++ b/src/pydvl/reporting/scores.py @@ -38,7 +38,7 @@ def compute_removal_score( if len(values) != len(u.data.indices): raise ValueError( - f"The number of values, {len(values) }, should be equal to the number of data indices, {len(u.data.indices)}" + f"The number of values, {len(values)}, should be equal to the number of data indices, {len(u.data.indices)}" ) scores = {} diff --git a/src/pydvl/utils/caching/__init__.py b/src/pydvl/utils/caching/__init__.py index dcf3118db..5b50d4fdb 100644 --- a/src/pydvl/utils/caching/__init__.py +++ b/src/pydvl/utils/caching/__init__.py @@ -83,6 +83,7 @@ [ignore_args][pydvl.utils.caching.config.CachedFuncConfig] option in the configuration. """ + from .base import * from .config import * from .disk import * diff --git a/src/pydvl/utils/caching/memcached.py b/src/pydvl/utils/caching/memcached.py index 0eb348f68..b29b0de97 100644 --- a/src/pydvl/utils/caching/memcached.py +++ b/src/pydvl/utils/caching/memcached.py @@ -182,8 +182,7 @@ def _connect(config: MemcachedClientConfig) -> RetryingClient: raise except AssertionError as e: logger.error( # type: ignore - f"@memcached: Failure saving dummy value " - f"to {config.server}: {str(e)}" + f"@memcached: Failure saving dummy value to {config.server}: {str(e)}" ) raise diff --git a/src/pydvl/utils/numeric.py b/src/pydvl/utils/numeric.py index 6b6533508..a310c54f7 100644 --- a/src/pydvl/utils/numeric.py +++ b/src/pydvl/utils/numeric.py @@ -2,6 +2,7 @@ This module contains routines for numerical computations used across the library. """ + from __future__ import annotations from itertools import chain, combinations @@ -273,8 +274,7 @@ def random_matrix_with_condition_number( @overload def running_moments( previous_avg: float, previous_variance: float, count: int, new_value: float -) -> Tuple[float, float]: - ... +) -> Tuple[float, float]: ... @overload @@ -283,8 +283,7 @@ def running_moments( previous_variance: NDArray[np.float64], count: int, new_value: NDArray[np.float64], -) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: - ... +) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: ... def running_moments( diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py index 86ce6b426..1b6a342a9 100644 --- a/src/pydvl/utils/progress.py +++ b/src/pydvl/utils/progress.py @@ -57,7 +57,7 @@ def wrapper_log_duration(*args, **kwargs): duration = time() - start_time logger.log( log_level, - f"Function '{func_name}' completed. " f"Duration: {duration:.2f} sec", + f"Function '{func_name}' completed. Duration: {duration:.2f} sec", ) return result diff --git a/src/pydvl/utils/score.py b/src/pydvl/utils/score.py index 05b47266b..06020223f 100644 --- a/src/pydvl/utils/score.py +++ b/src/pydvl/utils/score.py @@ -6,7 +6,7 @@ are typically used by the [Utility][pydvl.utils.utility.Utility] class to evaluate the quality of a model when trained on subsets of the training data. -Scorers can be constructed in the same way as in scikit-learn: either from +Scorers can be constructed in the same way as in scikit-learn: either from known strings or from a callable. Greater values must be better. If they are not, a negated version can be used, see scikit-learn's [make_scorer()](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html). @@ -17,6 +17,7 @@ [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]) to estimate the number of samples required for a certain quality of approximation. """ + from typing import Callable, Optional, Protocol, Tuple, Union import numpy as np @@ -38,8 +39,7 @@ class ScorerCallable(Protocol): """Signature for a scorer""" - def __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: - ... + def __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: ... class Scorer: diff --git a/src/pydvl/utils/types.py b/src/pydvl/utils/types.py index 563d28150..2dc344e22 100644 --- a/src/pydvl/utils/types.py +++ b/src/pydvl/utils/types.py @@ -1,4 +1,4 @@ -""" This module contains types, protocols, decorators and generic function +"""This module contains types, protocols, decorators and generic function transformations. Some of it probably belongs elsewhere. """ @@ -29,18 +29,15 @@ class MapFunction(Protocol[R]): - def __call__(self, *args: Any, **kwargs: Any) -> R: - ... + def __call__(self, *args: Any, **kwargs: Any) -> R: ... class ReduceFunction(Protocol[R]): - def __call__(self, *args: Any, **kwargs: Any) -> R: - ... + def __call__(self, *args: Any, **kwargs: Any) -> R: ... class LossFunction(Protocol): - def __call__(self, y_true: NDArray, y_pred: NDArray) -> NDArray: - ... + def __call__(self, y_true: NDArray, y_pred: NDArray) -> NDArray: ... @runtime_checkable @@ -97,7 +94,7 @@ def score(self, x: NDArray, y: NDArray | None) -> float: def ensure_seed_sequence( - seed: Optional[Union[Seed, SeedSequence]] = None + seed: Optional[Union[Seed, SeedSequence]] = None, ) -> SeedSequence: """ If the passed seed is a SeedSequence object then it is returned as is. If it is diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py index b1426afc2..cb0810f0a 100644 --- a/src/pydvl/utils/utility.py +++ b/src/pydvl/utils/utility.py @@ -23,7 +23,7 @@ learning](https://arxiv.org/abs/2107.06336). arXiv preprint arXiv:2107.06336. """ -import hashlib + import logging import warnings from typing import Dict, FrozenSet, Iterable, Optional, Tuple, Union, cast diff --git a/src/pydvl/valuation/base.py b/src/pydvl/valuation/base.py index 2e628448b..1c22469b5 100644 --- a/src/pydvl/valuation/base.py +++ b/src/pydvl/valuation/base.py @@ -15,8 +15,7 @@ def __init__(self) -> None: self.result: ValuationResult | None = None @abstractmethod - def fit(self, data: Dataset): - ... + def fit(self, data: Dataset): ... def values(self, sort: bool = False) -> ValuationResult: """Returns a copy of the valuation result. diff --git a/src/pydvl/valuation/games.py b/src/pydvl/valuation/games.py index 0c0a776a2..b5bb3b3e3 100644 --- a/src/pydvl/valuation/games.py +++ b/src/pydvl/valuation/games.py @@ -169,8 +169,7 @@ def least_core_values(self) -> ValuationResult: ) @abstractmethod - def _score(self, X: NDArray) -> float: - ... + def _score(self, X: NDArray) -> float: ... def __repr__(self) -> str: return f"{self.__class__.__name__}(n_players={self.n_players})" @@ -729,6 +728,6 @@ def _exact_a_lb(n_players): ) else: raise NotImplementedError( - f"Exact A_lb matrix is not implemented for more than 4 players." + "Exact A_lb matrix is not implemented for more than 4 players." ) return a_lb.astype(float) diff --git a/src/pydvl/valuation/methods/_solve_least_core_problems.py b/src/pydvl/valuation/methods/_solve_least_core_problems.py index 555f3f5a4..b491112ee 100644 --- a/src/pydvl/valuation/methods/_solve_least_core_problems.py +++ b/src/pydvl/valuation/methods/_solve_least_core_problems.py @@ -6,7 +6,6 @@ import cvxpy as cp import numpy as np -from joblib import Parallel, delayed from numpy.typing import NDArray from pydvl.utils import Status @@ -18,7 +17,6 @@ "_solve_least_core_linear_program", "_solve_egalitarian_least_core_quadratic_program", "lc_solve_problem", - "lc_solve_problems", "LeastCoreProblem", ] diff --git a/src/pydvl/valuation/methods/data_banzhaf.py b/src/pydvl/valuation/methods/data_banzhaf.py index 687e13d50..c9b237575 100644 --- a/src/pydvl/valuation/methods/data_banzhaf.py +++ b/src/pydvl/valuation/methods/data_banzhaf.py @@ -24,6 +24,7 @@ 26th International Conference on Artificial Intelligence and Statistics, 6388–6421. PMLR, 2023. """ + from pydvl.valuation.methods.semivalue import SemivalueValuation __all__ = ["DataBanzhafValuation"] diff --git a/src/pydvl/valuation/methods/gt_shapley.py b/src/pydvl/valuation/methods/gt_shapley.py index 65fee7b3d..83749ea4a 100644 --- a/src/pydvl/valuation/methods/gt_shapley.py +++ b/src/pydvl/valuation/methods/gt_shapley.py @@ -143,7 +143,7 @@ def fit(self, data: Dataset) -> Self: def compute_n_samples(epsilon: float, delta: float, n_obs: int) -> int: - """Compute the minimal sample size with epsilon-delta guarantees. + r"""Compute the minimal sample size with epsilon-delta guarantees. Based on the formula in Theorem 4 of (Jia, R. et al., 2023)2 diff --git a/src/pydvl/valuation/methods/knn_shapley.py b/src/pydvl/valuation/methods/knn_shapley.py index f2b26f3c9..d91e80793 100644 --- a/src/pydvl/valuation/methods/knn_shapley.py +++ b/src/pydvl/valuation/methods/knn_shapley.py @@ -13,6 +13,7 @@ the VLDB Endowment, Vol. 12, No. 11, pp. 1610–1623. """ + from __future__ import annotations import numpy as np diff --git a/src/pydvl/valuation/methods/least_core.py b/src/pydvl/valuation/methods/least_core.py index 7db5f0fd1..e3933bce3 100644 --- a/src/pydvl/valuation/methods/least_core.py +++ b/src/pydvl/valuation/methods/least_core.py @@ -134,13 +134,13 @@ class ExactLeastCoreValuation(LeastCoreValuation): $$ \begin{array}{lll} - \text{minimize} & \displaystyle{e} & \\ - \text{subject to} & \displaystyle\sum_{i\in N} x_{i} = v(N) & \\ - & \displaystyle\sum_{i\in S} x_{i} + e \geq v(S) &, \forall S \subseteq N \\ - \end{array} + \text{minimize} & \\displaystyle{e} & \\ + \text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\ + & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) &, \forall S \\subseteq N \\ + \\end{array} $$ - Where $N = \{1, 2, \dots, n\}$ are the training set's indices. + Where $N = \\{1, 2, \\dots, n\\}$ are the training set's indices. Args: utility: Utility object with model, data and scoring function. @@ -184,11 +184,11 @@ class MonteCarloLeastCoreValuation(LeastCoreValuation): $$ \begin{array}{lll} - \text{minimize} & \displaystyle{e} & \\ - \text{subject to} & \displaystyle\sum_{i\in N} x_{i} = v(N) & \\ - & \displaystyle\sum_{i\in S} x_{i} + e \geq v(S) & , - \forall S \in \{S_1, S_2, \dots, S_m \overset{\mathrm{iid}}{\sim} U(2^N) \} - \end{array} + \text{minimize} & \\displaystyle{e} & \\ + \text{subject to} & \\displaystyle\\sum_{i\\in N} x_{i} = v(N) & \\ + & \\displaystyle\\sum_{i\\in S} x_{i} + e \\geq v(S) & , + \forall S \\in \\{S_1, S_2, \\dots, S_m \\overset{\\mathrm{iid}}{\\sim} U(2^N) \\} + \\end{array} $$ Where: diff --git a/src/pydvl/valuation/methods/loo.py b/src/pydvl/valuation/methods/loo.py index 639ebf86f..7267371e6 100644 --- a/src/pydvl/valuation/methods/loo.py +++ b/src/pydvl/valuation/methods/loo.py @@ -5,7 +5,7 @@ defined as: $$ -v_\text{LOO}(i) = U(I) - U(I \setminus \{i\}), +v_\text{LOO}(i) = U(I) - U(I \\setminus \\{i\\}), $$ where $U$ is the utility function, $I$ is the set of all data points, and $i$ is the @@ -14,6 +14,7 @@ Strictly speaking, LOO can be seen as a [semivalue][pydvl.valuation.semivalue] where the coefficients are zero except for $k=|D|-1$, """ + from __future__ import annotations from pydvl.valuation.methods.semivalue import SemivalueValuation diff --git a/src/pydvl/valuation/methods/msr_banzhaf.py b/src/pydvl/valuation/methods/msr_banzhaf.py index 28c867ffe..32fc30092 100644 --- a/src/pydvl/valuation/methods/msr_banzhaf.py +++ b/src/pydvl/valuation/methods/msr_banzhaf.py @@ -11,6 +11,7 @@ Statistics, pp. 6388-6421. """ + from __future__ import annotations import numpy as np diff --git a/src/pydvl/valuation/methods/owen_shapley.py b/src/pydvl/valuation/methods/owen_shapley.py index 1cbd6e824..94c29d52a 100644 --- a/src/pydvl/valuation/methods/owen_shapley.py +++ b/src/pydvl/valuation/methods/owen_shapley.py @@ -7,7 +7,6 @@ from pydvl.utils import Status from pydvl.valuation.dataset import Dataset from pydvl.valuation.methods.semivalue import SemivalueValuation -from pydvl.valuation.result import ValuationResult from pydvl.valuation.samplers.powerset import OwenSampler from pydvl.valuation.stopping import NoStopping from pydvl.valuation.utility.base import UtilityBase diff --git a/src/pydvl/valuation/methods/twodshapley.py b/src/pydvl/valuation/methods/twodshapley.py index 42f0fac4b..59262c950 100644 --- a/src/pydvl/valuation/methods/twodshapley.py +++ b/src/pydvl/valuation/methods/twodshapley.py @@ -11,6 +11,7 @@ 40th International Conference on Machine Learning, 21730–55. PMLR, 2023. """ + from __future__ import annotations import hashlib diff --git a/src/pydvl/valuation/result.py b/src/pydvl/valuation/result.py index 5fea62f1c..19ff17437 100644 --- a/src/pydvl/valuation/result.py +++ b/src/pydvl/valuation/result.py @@ -43,6 +43,7 @@ samples random values uniformly. """ + from __future__ import annotations import collections.abc @@ -374,16 +375,13 @@ def __getattr__(self, attr: str) -> Any: ) from e @overload - def __getitem__(self, key: int) -> ValueItem: - ... + def __getitem__(self, key: int) -> ValueItem: ... @overload - def __getitem__(self, key: slice) -> List[ValueItem]: - ... + def __getitem__(self, key: slice) -> List[ValueItem]: ... @overload - def __getitem__(self, key: Iterable[int]) -> List[ValueItem]: - ... + def __getitem__(self, key: Iterable[int]) -> List[ValueItem]: ... def __getitem__( self, key: Union[slice, Iterable[int], int] @@ -409,16 +407,13 @@ def __getitem__( raise TypeError("Indices must be integers, iterable or slices") @overload - def __setitem__(self, key: int, value: ValueItem) -> None: - ... + def __setitem__(self, key: int, value: ValueItem) -> None: ... @overload - def __setitem__(self, key: slice, value: ValueItem) -> None: - ... + def __setitem__(self, key: slice, value: ValueItem) -> None: ... @overload - def __setitem__(self, key: Iterable[int], value: ValueItem) -> None: - ... + def __setitem__(self, key: Iterable[int], value: ValueItem) -> None: ... def __setitem__( self, key: Union[slice, Iterable[int], int], value: ValueItem @@ -593,7 +588,7 @@ def __add__(self, other: ValuationResult) -> ValuationResult: other_shared_names = np.take(other_names, both_pos) if np.any(this_shared_names != other_shared_names): - raise ValueError(f"Mismatching names in ValuationResults") + raise ValueError("Mismatching names in ValuationResults") names = np.empty_like(indices, dtype=self._names.dtype) names[this_pos] = self._names diff --git a/src/pydvl/valuation/samplers/__init__.py b/src/pydvl/valuation/samplers/__init__.py index 3ad240599..07c217a73 100644 --- a/src/pydvl/valuation/samplers/__init__.py +++ b/src/pydvl/valuation/samplers/__init__.py @@ -79,6 +79,7 @@ def fit(self, data: Dataset): Approximation for Data Evaluation](https://doi.org/10.48550/arXiv.2311.05346). arXiv, 9 November 2023. """ + from typing import Union from .base import * diff --git a/src/pydvl/valuation/samplers/base.py b/src/pydvl/valuation/samplers/base.py index eb5b1e12c..292b0d443 100644 --- a/src/pydvl/valuation/samplers/base.py +++ b/src/pydvl/valuation/samplers/base.py @@ -19,7 +19,6 @@ NullaryPredicate, SampleBatch, SampleGenerator, - ValueUpdate, ValueUpdateT, ) from pydvl.valuation.utility.base import UtilityBase diff --git a/src/pydvl/valuation/samplers/classwise.py b/src/pydvl/valuation/samplers/classwise.py index c7c3f37ad..7b9161672 100644 --- a/src/pydvl/valuation/samplers/classwise.py +++ b/src/pydvl/valuation/samplers/classwise.py @@ -24,7 +24,7 @@ def roundrobin( - batch_generators: Mapping[U, Iterable[V]] + batch_generators: Mapping[U, Iterable[V]], ) -> Generator[tuple[U, V], None, None]: """Taken samples from batch generators in order until all of them are exhausted. diff --git a/src/pydvl/valuation/samplers/powerset.py b/src/pydvl/valuation/samplers/powerset.py index 7bfe5deca..3821a8557 100644 --- a/src/pydvl/valuation/samplers/powerset.py +++ b/src/pydvl/valuation/samplers/powerset.py @@ -33,7 +33,7 @@ import logging from abc import ABC, abstractmethod -from typing import Callable, Collection, Generator, Iterable, Type, cast +from typing import Callable, Generator, Iterable, Type import numpy as np from numpy.typing import NDArray @@ -79,13 +79,11 @@ def __init__(self, indices: NDArray[IndexT]): self._indices = indices @abstractmethod - def __iter__(self) -> Generator[IndexT | None, None, None]: - ... + def __iter__(self) -> Generator[IndexT | None, None, None]: ... @staticmethod @abstractmethod - def length(indices: IndexSetT) -> int | None: - ... + def length(indices: IndexSetT) -> int | None: ... class SequentialIndexIteration(IndexIteration): diff --git a/src/pydvl/valuation/scorers/base.py b/src/pydvl/valuation/scorers/base.py index 772d0229a..a8c9162ba 100644 --- a/src/pydvl/valuation/scorers/base.py +++ b/src/pydvl/valuation/scorers/base.py @@ -29,5 +29,4 @@ class Scorer(ABC): range: NDArray[np.float_] @abstractmethod - def __call__(self, model) -> float: - ... + def __call__(self, model) -> float: ... diff --git a/src/pydvl/valuation/scorers/classwise.py b/src/pydvl/valuation/scorers/classwise.py index eb50fded8..58ff17eea 100644 --- a/src/pydvl/valuation/scorers/classwise.py +++ b/src/pydvl/valuation/scorers/classwise.py @@ -68,7 +68,7 @@ class ClasswiseSupervisedScorer(SupervisedScorer): to discount the out-of-class score. rescale_scores: If set to True, the scores will be denormalized. This is particularly useful when the inner score function $a_S$ is calculated by - an estimator of the form $\frac{1}{N} \sum_i x_i$. + an estimator of the form $\frac{1}{N} \\sum_i x_i$. name: Name of the scorer. If not provided, the name of the inner scoring function will be prefixed by `classwise `. diff --git a/src/pydvl/valuation/scorers/supervised.py b/src/pydvl/valuation/scorers/supervised.py index a99296240..770cdf333 100644 --- a/src/pydvl/valuation/scorers/supervised.py +++ b/src/pydvl/valuation/scorers/supervised.py @@ -33,8 +33,7 @@ class SupervisedScorerCallable(Protocol): def __call__( self, model: SupervisedModel, X: NDArray[Any], y: NDArray[Any] - ) -> float: - ... + ) -> float: ... class SupervisedScorer(Scorer): diff --git a/src/pydvl/valuation/scorers/utils.py b/src/pydvl/valuation/scorers/utils.py index 8f0686372..53b7151f2 100644 --- a/src/pydvl/valuation/scorers/utils.py +++ b/src/pydvl/valuation/scorers/utils.py @@ -1,9 +1,8 @@ -from typing import Callable, Tuple, Type +from typing import Callable from scipy.special import expit from pydvl.utils.types import SupervisedModel -from pydvl.valuation.dataset import Dataset from pydvl.valuation.scorers.supervised import SupervisedScorer __all__ = ["compose_score", "sigmoid"] diff --git a/src/pydvl/valuation/stopping.py b/src/pydvl/valuation/stopping.py index 4374b909d..063ada439 100644 --- a/src/pydvl/valuation/stopping.py +++ b/src/pydvl/valuation/stopping.py @@ -147,8 +147,7 @@ class StoppingCriterionCallable(Protocol): """Signature for a stopping criterion""" - def __call__(self, result: ValuationResult) -> Status: - ... + def __call__(self, result: ValuationResult) -> Status: ... class StoppingCriterion(abc.ABC): @@ -472,7 +471,7 @@ def completion(self) -> float: return 0.0 def __str__(self) -> str: - return f"NoStopping()" + return "NoStopping()" class MinUpdates(StoppingCriterion): diff --git a/src/pydvl/valuation/types.py b/src/pydvl/valuation/types.py index ecd8ba77f..0defd73d0 100644 --- a/src/pydvl/valuation/types.py +++ b/src/pydvl/valuation/types.py @@ -152,5 +152,4 @@ def __iter__(self): # No way to type the return Iterator properly class LossFunction(Protocol): - def __call__(self, y_true: NDArray, y_pred: NDArray) -> NDArray: - ... + def __call__(self, y_true: NDArray, y_pred: NDArray) -> NDArray: ... diff --git a/src/pydvl/valuation/utility/modelutility.py b/src/pydvl/valuation/utility/modelutility.py index 47b938cd8..b9f76d66e 100644 --- a/src/pydvl/valuation/utility/modelutility.py +++ b/src/pydvl/valuation/utility/modelutility.py @@ -10,7 +10,7 @@ from pydvl.utils.caching import CacheBackend, CachedFuncConfig, CacheStats from pydvl.utils.types import BaseModel from pydvl.valuation.scorers import Scorer -from pydvl.valuation.types import Sample, SampleT +from pydvl.valuation.types import SampleT __all__ = ["ModelUtility"] diff --git a/src/pydvl/valuation/utils.py b/src/pydvl/valuation/utils.py index 61de29710..eb22b9876 100644 --- a/src/pydvl/valuation/utils.py +++ b/src/pydvl/valuation/utils.py @@ -2,6 +2,7 @@ import uuid from abc import ABC, abstractmethod +from contextlib import contextmanager from multiprocessing import shared_memory from joblib._parallel_backends import ( @@ -33,23 +34,19 @@ def ensure_backend_has_generator_return(): class Flag(ABC): @abstractmethod - def set(self): - ... + def set(self): ... @abstractmethod - def reset(self): - ... + def reset(self): ... @abstractmethod - def __call__(self): - ... + def __call__(self): ... def __bool__(self): # some syntactic sugar return self.__call__() @abstractmethod - def unlink(self): - ... + def unlink(self): ... class ThreadingFlag(Flag): @@ -94,9 +91,6 @@ def unlink(self): self._flag.unlink() -from contextlib import contextmanager - - @contextmanager def make_parallel_flag(): backend = _get_active_backend()[0] diff --git a/src/pydvl/value/__init__.py b/src/pydvl/value/__init__.py index 891582d7d..ca5b71917 100644 --- a/src/pydvl/value/__init__.py +++ b/src/pydvl/value/__init__.py @@ -15,12 +15,12 @@ warnings.warn(msg, FutureWarning) -from .result import * # isort: skip -from ..utils import Dataset, Scorer, Utility -from .least_core import * -from .loo import * -from .oob import * -from .sampler import * -from .semivalues import * -from .shapley import * -from .stopping import * +from ..utils import Dataset, Scorer, Utility # noqa +from .least_core import * # noqa: E402 +from .loo import * # noqa: E402 +from .oob import * # noqa: E402 +from .result import * # noqa: E402 +from .sampler import * # noqa: E402 +from .semivalues import * # noqa: E402 +from .shapley import * # noqa: E402 +from .stopping import * # noqa: E402 diff --git a/src/pydvl/value/games.py b/src/pydvl/value/games.py index 84fe9cbac..dc9567053 100644 --- a/src/pydvl/value/games.py +++ b/src/pydvl/value/games.py @@ -11,6 +11,7 @@ Computers & Operations Research, 36(5), pp.1726-1730. """ + from __future__ import annotations from abc import ABC, abstractmethod @@ -148,8 +149,7 @@ def least_core_values(self) -> ValuationResult: ) @abstractmethod - def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: - ... + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: ... def __repr__(self) -> str: return f"{self.__class__.__name__}(n_players={self.n_players})" diff --git a/src/pydvl/value/least_core/__init__.py b/src/pydvl/value/least_core/__init__.py index 02007f8bd..ce0078c1e 100644 --- a/src/pydvl/value/least_core/__init__.py +++ b/src/pydvl/value/least_core/__init__.py @@ -20,7 +20,7 @@ case. The solution of the linear systems can then be done in parallel. """ -import warnings + from enum import Enum from typing import Optional diff --git a/src/pydvl/value/least_core/common.py b/src/pydvl/value/least_core/common.py index d7842ed7e..5b162f9b7 100644 --- a/src/pydvl/value/least_core/common.py +++ b/src/pydvl/value/least_core/common.py @@ -199,21 +199,21 @@ def _map_func( parallel_backend = _maybe_init_parallel_backend(parallel_backend, config) - map_reduce_job: MapReduceJob[ - "LeastCoreProblem", "List[ValuationResult]" - ] = MapReduceJob( - inputs=problems, - map_func=_map_func, - map_kwargs=dict( - u=u, - algorithm=algorithm, - non_negative_subsidy=non_negative_subsidy, - solver_options=solver_options, - **options, - ), - reduce_func=lambda x: list(itertools.chain(*x)), - parallel_backend=parallel_backend, - n_jobs=n_jobs, + map_reduce_job: MapReduceJob["LeastCoreProblem", "List[ValuationResult]"] = ( + MapReduceJob( + inputs=problems, + map_func=_map_func, + map_kwargs=dict( + u=u, + algorithm=algorithm, + non_negative_subsidy=non_negative_subsidy, + solver_options=solver_options, + **options, + ), + reduce_func=lambda x: list(itertools.chain(*x)), + parallel_backend=parallel_backend, + n_jobs=n_jobs, + ) ) solutions = map_reduce_job() diff --git a/src/pydvl/value/oob/oob.py b/src/pydvl/value/oob/oob.py index 71e32ff2a..93c18de51 100644 --- a/src/pydvl/value/oob/oob.py +++ b/src/pydvl/value/oob/oob.py @@ -6,6 +6,7 @@ In: Published at ICML 2023 """ + from typing import Optional, TypeVar import numpy as np diff --git a/src/pydvl/value/result.py b/src/pydvl/value/result.py index 6a714e1bf..bc1f11b56 100644 --- a/src/pydvl/value/result.py +++ b/src/pydvl/value/result.py @@ -358,16 +358,13 @@ def __getattr__(self, attr: str) -> Any: ) from e @overload - def __getitem__(self, key: int) -> ValueItem: - ... + def __getitem__(self, key: int) -> ValueItem: ... @overload - def __getitem__(self, key: slice) -> List[ValueItem]: - ... + def __getitem__(self, key: slice) -> List[ValueItem]: ... @overload - def __getitem__(self, key: Iterable[int]) -> List[ValueItem]: - ... + def __getitem__(self, key: Iterable[int]) -> List[ValueItem]: ... def __getitem__( self, key: Union[slice, Iterable[int], int] @@ -393,16 +390,13 @@ def __getitem__( raise TypeError("Indices must be integers, iterable or slices") @overload - def __setitem__(self, key: int, value: ValueItem) -> None: - ... + def __setitem__(self, key: int, value: ValueItem) -> None: ... @overload - def __setitem__(self, key: slice, value: ValueItem) -> None: - ... + def __setitem__(self, key: slice, value: ValueItem) -> None: ... @overload - def __setitem__(self, key: Iterable[int], value: ValueItem) -> None: - ... + def __setitem__(self, key: Iterable[int], value: ValueItem) -> None: ... def __setitem__( self, key: Union[slice, Iterable[int], int], value: ValueItem @@ -579,7 +573,7 @@ def __add__( other_shared_names = np.take(other_names, both_pos) if np.any(this_shared_names != other_shared_names): - raise ValueError(f"Mismatching names in ValuationResults") + raise ValueError("Mismatching names in ValuationResults") names = np.empty_like(indices, dtype=self._names.dtype) names[this_pos] = self._names diff --git a/src/pydvl/value/sampler.py b/src/pydvl/value/sampler.py index 3b915f5b7..3ce8d4b13 100644 --- a/src/pydvl/value/sampler.py +++ b/src/pydvl/value/sampler.py @@ -185,12 +185,10 @@ def iterindices(self) -> Iterator[IndexT]: yield np.random.choice(self._outer_indices, size=1).item() @overload - def __getitem__(self, key: slice) -> PowersetSampler[IndexT]: - ... + def __getitem__(self, key: slice) -> PowersetSampler[IndexT]: ... @overload - def __getitem__(self, key: list[int]) -> PowersetSampler[IndexT]: - ... + def __getitem__(self, key: list[int]) -> PowersetSampler[IndexT]: ... def __getitem__(self, key: slice | list[int]) -> PowersetSampler[IndexT]: if isinstance(key, slice) or isinstance(key, Iterable): @@ -212,8 +210,7 @@ def __repr__(self): return f"{self.__class__.__name__}({self._indices}, {self._outer_indices})" @abc.abstractmethod - def __iter__(self) -> Iterator[SampleT]: - ... + def __iter__(self) -> Iterator[SampleT]: ... @classmethod @abc.abstractmethod diff --git a/src/pydvl/value/semivalues.py b/src/pydvl/value/semivalues.py index 46cd1f16e..65000cf15 100644 --- a/src/pydvl/value/semivalues.py +++ b/src/pydvl/value/semivalues.py @@ -85,6 +85,7 @@ [Data Banzhaf: A Robust Data Valuation Framework for Machine Learning](https://proceedings.mlr.press/v206/wang23e.html). In: Proceedings of The 26th International Conference on Artificial Intelligence and Statistics, pp. 6388-6421. """ + from __future__ import annotations import logging @@ -267,8 +268,8 @@ def __call__( Then, this processor computes marginals based on the utility value and the index set provided. The final formula that gives the Banzhaf semivalue using MSR is: - $$\hat{\phi}_{MSR}(i) = \frac{1}{|\mathbf{S}_{\ni i}|} \sum_{S \in \mathbf{S}_{\ni i}} U(S) - - \frac{1}{|\mathbf{S}_{\not{\ni} i}|} \sum_{S \in \mathbf{S}_{\not{\ni} i}} U(S)$$ + $$\\hat{\\phi}_{MSR}(i) = \frac{1}{|\\mathbf{S}_{\ni i}|} \\sum_{S \\in \\mathbf{S}_{\ni i}} U(S) + - \frac{1}{|\\mathbf{S}_{\not{\ni} i}|} \\sum_{S \\in \\mathbf{S}_{\not{\ni} i}} U(S)$$ Args: future_result: Result of the parallel computing jobs comprised of diff --git a/src/pydvl/value/shapley/classwise.py b/src/pydvl/value/shapley/classwise.py index f3982f81b..75aaa4ad5 100644 --- a/src/pydvl/value/shapley/classwise.py +++ b/src/pydvl/value/shapley/classwise.py @@ -41,9 +41,9 @@ $$y = \max(0, \min(1, \text{round}(\beta^T x)))$$ in closed form $\beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)}$. From the closed-form - solution, the tables for in-class accuracy $a_S(D_{y_i})$ and out-of-class accuracy - $a_S(D_{-y_i})$ can be calculated. By using these tables and setting - $\{S^{(1)}, \dots, S^{(K)}\} = 2^{T_{-y_i}}$ and + solution, the tables for in-class accuracy $a_S(D_{y_i})$ and out-of-class accuracy + $a_S(D_{-y_i})$ can be calculated. By using these tables and setting + $\{S^{(1)}, \dots, S^{(K)}\} = 2^{T_{-y_i}}$ and $\{\sigma^{(1)}, \dots, \sigma^{(L)}\} = \Pi(T_{y_i}\setminus\{i\})$, the Monte Carlo estimator can be evaluated ($2^M$ is the powerset of $M$). The details of the derivation are left to the eager reader. @@ -57,6 +57,7 @@ (NeurIPS). New Orleans, Louisiana, USA, 2022. """ + import logging import numbers from concurrent.futures import FIRST_COMPLETED, Future, wait diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py index 81286a7a4..79754e3ec 100644 --- a/src/pydvl/value/shapley/gt.py +++ b/src/pydvl/value/shapley/gt.py @@ -264,7 +264,7 @@ def group_testing_shapley( samples_per_job = max(1, n_samples // parallel_backend.effective_n_jobs(n_jobs)) def reducer( - results_it: Iterable[Tuple[NDArray, NDArray]] + results_it: Iterable[Tuple[NDArray, NDArray]], ) -> Tuple[NDArray, NDArray]: return np.concatenate(list(x[0] for x in results_it)).astype( np.float64 diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py index 5267ed430..1b3a8c520 100644 --- a/src/pydvl/value/shapley/montecarlo.py +++ b/src/pydvl/value/shapley/montecarlo.py @@ -40,6 +40,7 @@ In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242–2251. """ + from __future__ import annotations import logging @@ -47,7 +48,7 @@ import operator from concurrent.futures import FIRST_COMPLETED, Future, wait from functools import reduce -from typing import Optional, Sequence, Union, cast +from typing import Optional, Sequence, Union import numpy as np from deprecate import deprecated diff --git a/src/pydvl/value/shapley/owen.py b/src/pydvl/value/shapley/owen.py index 0f5c0e9f0..305eda36a 100644 --- a/src/pydvl/value/shapley/owen.py +++ b/src/pydvl/value/shapley/owen.py @@ -9,7 +9,7 @@ import operator from enum import Enum from functools import reduce -from typing import Optional, Sequence, cast +from typing import Optional, Sequence import numpy as np from deprecate import deprecated @@ -51,7 +51,7 @@ def _owen_sampling_shapley( *, progress: bool = False, job_id: int = 1, - seed: Optional[Seed] = None + seed: Optional[Seed] = None, ) -> ValuationResult: r"""This is the algorithm as detailed in the paper: to compute the outer integral over q ∈ [0,1], use uniformly distributed points for evaluation @@ -132,7 +132,7 @@ def owen_sampling_shapley( parallel_backend: Optional[ParallelBackend] = None, config: Optional[ParallelConfig] = None, progress: bool = False, - seed: Optional[Seed] = None + seed: Optional[Seed] = None, ) -> ValuationResult: r"""Owen sampling of Shapley values as described in (Okhrati and Lipani, 2021)1. diff --git a/src/pydvl/value/shapley/truncated.py b/src/pydvl/value/shapley/truncated.py index d62f625fa..11a86f983 100644 --- a/src/pydvl/value/shapley/truncated.py +++ b/src/pydvl/value/shapley/truncated.py @@ -6,16 +6,14 @@ In: Proceedings of the 36th International Conference on Machine Learning, PMLR, pp. 2242–2251. """ + import abc import logging -from typing import Optional, cast +from typing import Optional import numpy as np -from pydvl.parallel.config import ParallelConfig from pydvl.utils import Utility, running_moments -from pydvl.value import ValuationResult -from pydvl.value.stopping import MaxChecks, StoppingCriterion __all__ = [ "TruncationPolicy", diff --git a/src/pydvl/value/stopping.py b/src/pydvl/value/stopping.py index 328e8b556..512ab8701 100644 --- a/src/pydvl/value/stopping.py +++ b/src/pydvl/value/stopping.py @@ -152,8 +152,7 @@ class StoppingCriterionCallable(Protocol): """Signature for a stopping criterion""" - def __call__(self, result: ValuationResult) -> Status: - ... + def __call__(self, result: ValuationResult) -> Status: ... class StoppingCriterion(abc.ABC): diff --git a/tests/conftest.py b/tests/conftest.py index f8e3bc51d..59dd7d865 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,7 @@ import os import platform from dataclasses import asdict -from typing import TYPE_CHECKING, Optional, Tuple +from typing import Optional, Tuple import numpy as np import pytest @@ -102,7 +102,7 @@ def memcached_service(request) -> Tuple[str, int]: @pytest.fixture(scope="function") -def memcache_client_config(memcached_service) -> "MemcachedClientConfig": +def memcache_client_config(memcached_service) -> "MemcachedClientConfig": # noqa: F821 from pydvl.utils import MemcachedClientConfig return MemcachedClientConfig( @@ -113,11 +113,9 @@ def memcache_client_config(memcached_service) -> "MemcachedClientConfig": @pytest.fixture(scope="function") def memcached_client( memcache_client_config, -) -> Tuple["Client", "MemcachedClientConfig"]: +) -> Tuple["Client", "MemcachedClientConfig"]: # noqa: F821 from pymemcache.client import Client - from pydvl.utils import MemcachedClientConfig - try: c = Client(**asdict(memcache_client_config)) c.flush_all() diff --git a/tests/influence/test_influence_calculator.py b/tests/influence/test_influence_calculator.py index bfd976e2a..2ef667b76 100644 --- a/tests/influence/test_influence_calculator.py +++ b/tests/influence/test_influence_calculator.py @@ -31,7 +31,6 @@ TorchNumpyConverter, ) from pydvl.influence.types import UnsupportedInfluenceModeException -from tests.influence.torch.test_influence_model import model_and_data, test_case from tests.influence.torch.test_util import are_active_layers_linear diff --git a/tests/influence/torch/test_batch_operation.py b/tests/influence/torch/test_batch_operation.py index b04f9b19b..20141c8a4 100644 --- a/tests/influence/torch/test_batch_operation.py +++ b/tests/influence/torch/test_batch_operation.py @@ -11,7 +11,7 @@ ) from pydvl.influence.torch.util import align_structure, flatten_dimensions -from .test_util import model_data, test_parameters, torch +from .test_util import test_parameters, torch # noqa: F811 @pytest.mark.torch diff --git a/tests/influence/torch/test_functional.py b/tests/influence/torch/test_functional.py index 5c7b90b50..38703991e 100644 --- a/tests/influence/torch/test_functional.py +++ b/tests/influence/torch/test_functional.py @@ -9,7 +9,7 @@ torch = pytest.importorskip("torch") import numpy as np -import torch +import torch # noqa: F811 from torch.nn.functional import mse_loss from torch.utils.data import DataLoader, TensorDataset @@ -27,7 +27,7 @@ from pydvl.influence.torch.util import align_structure, flatten_dimensions from .conftest import DATA_OUTPUT_NOISE, linear_mvp_model -from .test_util import model_data, test_parameters +from .test_util import test_parameters @pytest.mark.torch @@ -212,20 +212,20 @@ def mat_vec(x): # Parameters input_type = torch.float32 - mat_vec_device = torch.device("cpu") + # mat_vec_device = torch.device("cpu") # Call the function under test result = randomized_nystroem_approximation(mat_vec, dim, rank, input_type) # Check if the result is an instance of LowRankProductRepresentation - assert isinstance( - result, LowRankProductRepresentation - ), "Result should be an instance of LowRankProductRepresentation" + assert isinstance(result, LowRankProductRepresentation), ( + "Result should be an instance of LowRankProductRepresentation" + ) # Reconstruct the approximation of A from the result U, Sigma = result.projections, result.eigen_vals A_approx = torch.matmul(U, U.t() * Sigma.unsqueeze(-1)) # Verify that the approximation is close to the original A - assert torch.allclose( - A, A_approx, atol=1e-5, rtol=1e-3 - ), "The approximation should be close to the original matrix within a tolerance" + assert torch.allclose(A, A_approx, atol=1e-5, rtol=1e-3), ( + "The approximation should be close to the original matrix within a tolerance" + ) diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py index 929fc286a..b802eeef2 100644 --- a/tests/influence/torch/test_influence_model.py +++ b/tests/influence/torch/test_influence_model.py @@ -30,7 +30,7 @@ torch = pytest.importorskip("torch") -import torch +import torch # noqa: F811 import torch.nn.functional as F from pytest_cases import fixture, parametrize, parametrize_with_cases from torch import nn diff --git a/tests/influence/torch/test_operator.py b/tests/influence/torch/test_operator.py index 67b45a26b..a1d8f4de3 100644 --- a/tests/influence/torch/test_operator.py +++ b/tests/influence/torch/test_operator.py @@ -6,7 +6,7 @@ from pydvl.influence.torch.operator import HessianOperator -from .test_util import model_data, test_parameters +from .test_util import test_parameters @pytest.mark.torch diff --git a/tests/influence/torch/test_util.py b/tests/influence/torch/test_util.py index e0610dd79..16e4feeb6 100644 --- a/tests/influence/torch/test_util.py +++ b/tests/influence/torch/test_util.py @@ -7,7 +7,7 @@ from pydvl.influence.torch.operator import MatrixOperator torch = pytest.importorskip("torch") -import torch.nn +import torch.nn # noqa: F811 from numpy.typing import NDArray from scipy.stats import pearsonr, spearmanr from torch.nn.functional import mse_loss diff --git a/tests/parallel/test_parallel.py b/tests/parallel/test_parallel.py index d79a97d52..894bc176b 100644 --- a/tests/parallel/test_parallel.py +++ b/tests/parallel/test_parallel.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pydvl.parallel import MapReduceJob, RayParallelBackend, init_parallel_backend +from pydvl.parallel import MapReduceJob, RayParallelBackend from pydvl.utils.types import Seed from ..conftest import num_workers diff --git a/tests/test_results.py b/tests/test_results.py index c7cd7948f..76ba578e8 100644 --- a/tests/test_results.py +++ b/tests/test_results.py @@ -50,7 +50,7 @@ def test_sorting(values, names, ranks_asc, dummy_values): def test_dataframe_sorting(values, names, ranks_asc, dummy_values): sorted_names = [names[r] for r in ranks_asc] try: - import pandas + import pandas # noqa: F401 df = dummy_values.to_dataframe(use_names=False) assert all(df.index.values == ranks_asc) diff --git a/tests/utils/test_dataset.py b/tests/utils/test_dataset.py index c522b587f..680b15167 100644 --- a/tests/utils/test_dataset.py +++ b/tests/utils/test_dataset.py @@ -20,8 +20,7 @@ def test_creating_dataset_from_sklearn(train_size): def test_creating_dataset_subsclassfrom_sklearn(train_size): data = load_wine() - class TestDataset(Dataset): - ... + class TestDataset(Dataset): ... dataset = TestDataset.from_sklearn(data, train_size=train_size) assert isinstance(dataset, TestDataset) @@ -61,8 +60,7 @@ def test_creating_grouped_dataset_from_sklearn_failure(train_size): def test_creating_grouped_dataset_subsclassfrom_sklearn(train_size): data = load_wine() - class TestGroupedDataset(GroupedDataset): - ... + class TestGroupedDataset(GroupedDataset): ... data_groups = np.random.randint(low=0, high=3, size=len(data.data)).flatten() n_groups = len(np.unique(data_groups)) diff --git a/tests/utils/test_numeric.py b/tests/utils/test_numeric.py index bbb2a4129..acda36f6a 100644 --- a/tests/utils/test_numeric.py +++ b/tests/utils/test_numeric.py @@ -32,9 +32,7 @@ def test_powerset(): assert all([np.math.comb(n, j) for j in range(n + 1)] == size_counts) -@pytest.mark.parametrize( - "n, max_subsets", [(1, 10), (10, 2**10), (5, 2**7), (0, 1)] -) +@pytest.mark.parametrize("n, max_subsets", [(1, 10), (10, 2**10), (5, 2**7), (0, 1)]) @pytest.mark.parametrize("q", [0.0, 0.1, 0.26, 0.49, 0.5, 0.6, 1]) def test_random_powerset(n, max_subsets, q): """Tests frequency of items in sets and frequencies of set sizes. @@ -144,7 +142,7 @@ def test_random_subset_of_size(n, size, exception): "n, size", [(10, 3), (1000, 40)], ) -def test_random_subset_of_size_stochastic(n, size, seed, seed_alt): +def test_random_subset_of_size_stochastic_unequal(n, size, seed, seed_alt): """ Test that the same seeds produce the same results, and different seeds produce different results for method :func:`random_subset_of_size`. @@ -159,7 +157,7 @@ def test_random_subset_of_size_stochastic(n, size, seed, seed_alt): "n, size", [(10, 3), (1000, 40)], ) -def test_random_subset_of_size_stochastic(n, size, seed): +def test_random_subset_of_size_stochastic_equal(n, size, seed): """ Test that the same seeds produce the same results, and different seeds produce different results for method :func:`random_subset_of_size`. diff --git a/tests/utils/test_utility.py b/tests/utils/test_utility.py index 335b0c136..6be0f5f31 100644 --- a/tests/utils/test_utility.py +++ b/tests/utils/test_utility.py @@ -134,7 +134,7 @@ def test_utility_serialization(linear_dataset, use_cache): cache_backend=cache, ) u_unpickled = pickle.loads(pickle.dumps(u)) - assert type(u.model) == type(u_unpickled.model) - assert type(u.scorer) == type(u_unpickled.scorer) - assert type(u.data) == type(u_unpickled.data) + assert type(u.model) is type(u_unpickled.model) + assert type(u.scorer) is type(u_unpickled.scorer) + assert type(u.data) is type(u_unpickled.data) assert (u.data.x_train == u_unpickled.data.x_train).all() diff --git a/tests/valuation/methods/test_deterministic_shapley_valuations.py b/tests/valuation/methods/test_deterministic_shapley_valuations.py index 5e0b16ff2..5de7cb2c2 100644 --- a/tests/valuation/methods/test_deterministic_shapley_valuations.py +++ b/tests/valuation/methods/test_deterministic_shapley_valuations.py @@ -1,4 +1,5 @@ """Test the deterministic Shapley valuation methods (combinatorial and permutation).""" + import logging import numpy as np diff --git a/tests/valuation/methods/test_montecarlo_shapley_valuations.py b/tests/valuation/methods/test_montecarlo_shapley_valuations.py index f3b8eab6f..363ebfb64 100644 --- a/tests/valuation/methods/test_montecarlo_shapley_valuations.py +++ b/tests/valuation/methods/test_montecarlo_shapley_valuations.py @@ -241,7 +241,8 @@ def test_hoeffding_bound_montecarlo( @pytest.mark.slow @pytest.mark.parametrize( - "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 ~= 6 samples + "a, b, num_points", + [(2, 0, 21)], # training set will have 0.3 * 21 ~= 6 samples ) @pytest.mark.parametrize( "sampler_class, sampler_kwargs, valuation_class, valuation_kwargs", @@ -329,7 +330,8 @@ def test_linear_montecarlo_with_outlier( @pytest.mark.parametrize( - "a, b, num_points, num_groups", [(2, 0, 21, 2)] # 24*0.3=6 samples in 2 groups + "a, b, num_points, num_groups", + [(2, 0, 21, 2)], # 24*0.3=6 samples in 2 groups ) @pytest.mark.parametrize( "sampler_class, kwargs", diff --git a/tests/valuation/methods/test_semivalues.py b/tests/valuation/methods/test_semivalues.py index 4e409f1fd..93c989cbc 100644 --- a/tests/valuation/methods/test_semivalues.py +++ b/tests/valuation/methods/test_semivalues.py @@ -10,9 +10,7 @@ BetaShapleyValuation, DataBanzhafValuation, DataShapleyValuation, - DeltaShapleyValuation, MSRBanzhafValuation, - SemivalueValuation, ) from pydvl.valuation.samplers import ( AntitheticPermutationSampler, diff --git a/tests/valuation/methods/test_solve_least_core_problems.py b/tests/valuation/methods/test_solve_least_core_problems.py index 059c275c4..26d0e00b0 100644 --- a/tests/valuation/methods/test_solve_least_core_problems.py +++ b/tests/valuation/methods/test_solve_least_core_problems.py @@ -12,7 +12,6 @@ indirect=True, ) def test_lc_solve_problems(test_game): - test_game.u = test_game.u.with_dataset(test_game.data) problem = test_game.least_core_problem() diff --git a/tests/valuation/samplers/__init__.py b/tests/valuation/samplers/__init__.py index 3c9a24ad7..55e95184a 100644 --- a/tests/valuation/samplers/__init__.py +++ b/tests/valuation/samplers/__init__.py @@ -22,9 +22,9 @@ def _check_subsets(batches, expected): def _check_classwise_batches( batches: list[list[ClasswiseSample]], expected_batches: list[list[ClasswiseSample]] ) -> None: - assert len(batches) == len( - expected_batches - ), f"{len(batches)=} != {len(expected_batches)=}" + assert len(batches) == len(expected_batches), ( + f"{len(batches)=} != {len(expected_batches)=}" + ) for batch, expected_batch in zip(batches, expected_batches): for sample, expected_sample in zip(batch, expected_batch): assert_array_equal(sample.subset, expected_sample.subset) diff --git a/tests/valuation/test_interface.py b/tests/valuation/test_interface.py index ddde82560..f5759c676 100644 --- a/tests/valuation/test_interface.py +++ b/tests/valuation/test_interface.py @@ -1,7 +1,5 @@ -"""Simple test for the public user interface documented in turorials.""" +"""Simple test for the public user interface documented in tutorials.""" - -import logging import os from contextlib import contextmanager diff --git a/tests/valuation/test_result.py b/tests/valuation/test_result.py index 844e39eed..e30bdd471 100644 --- a/tests/valuation/test_result.py +++ b/tests/valuation/test_result.py @@ -52,7 +52,7 @@ def test_sorting(values, names, ranks_asc, dummy_values): def test_dataframe_sorting(values, names, ranks_asc, dummy_values): sorted_names = [names[r] for r in ranks_asc] try: - import pandas + import pandas # noqa: F401 df = dummy_values.to_dataframe(use_names=False) assert np.alltrue(df.index.values == ranks_asc) diff --git a/tests/valuation/utils.py b/tests/valuation/utils.py index c55ab13eb..dcf863651 100644 --- a/tests/valuation/utils.py +++ b/tests/valuation/utils.py @@ -4,7 +4,7 @@ from copy import deepcopy from functools import wraps from logging import getLogger -from typing import Callable, Optional, Protocol, Tuple, TypeVar +from typing import Callable, Protocol, Tuple, TypeVar from pydvl.utils.types import Seed @@ -35,8 +35,7 @@ class TimedCallable(Protocol): execution_time: float - def __call__(self, *args, **kwargs) -> ReturnT: - ... + def __call__(self, *args, **kwargs) -> ReturnT: ... def timed(fun: Callable[..., ReturnT]) -> TimedCallable: diff --git a/tests/value/conftest.py b/tests/value/conftest.py index 1e3dad94e..a0841dc1e 100644 --- a/tests/value/conftest.py +++ b/tests/value/conftest.py @@ -8,7 +8,6 @@ from sklearn.utils import Bunch from pydvl.parallel import JoblibParallelBackend -from pydvl.parallel.config import ParallelConfig from pydvl.utils import Dataset, SupervisedModel, Utility from pydvl.utils.caching import InMemoryCacheBackend from pydvl.utils.status import Status diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py index ec8864afd..790d1b4c1 100644 --- a/tests/value/shapley/test_classwise.py +++ b/tests/value/shapley/test_classwise.py @@ -12,9 +12,8 @@ from sklearn.linear_model import LogisticRegression from pydvl.utils import Dataset as OldDataset -from pydvl.utils import Utility +from pydvl.utils import Utility, powerset from pydvl.utils import Utility as OldUtility -from pydvl.utils import powerset from pydvl.valuation import ( ClasswiseModelUtility, ClasswiseSampler, @@ -115,9 +114,9 @@ def classwise_shapley_exact_solution_no_default() -> Tuple[Dict, ValuationResult @pytest.fixture(scope="function") -def classwise_shapley_exact_solution_no_default_allow_empty_set() -> ( - Tuple[Dict, ValuationResult, Dict] -): +def classwise_shapley_exact_solution_no_default_allow_empty_set() -> Tuple[ + Dict, ValuationResult, Dict +]: r""" Note that this special case doesn't set the utility to 0 if the permutation is empty and additionally allows $S^{(k)} = \emptyset$. See diff --git a/tests/value/shapley/test_montecarlo.py b/tests/value/shapley/test_montecarlo.py index 0821b29cf..5317e28e6 100644 --- a/tests/value/shapley/test_montecarlo.py +++ b/tests/value/shapley/test_montecarlo.py @@ -82,7 +82,7 @@ def test_games( parallel_backend=parallel_backend, seed=seed, progress=True, - **kwargs + **kwargs, ) exact_values = test_game.shapley_values() @@ -124,7 +124,7 @@ def test_seed( n_jobs=n_jobs, parallel_backend=parallel_backend, seeds=(seed, seed, seed_alt), - **deepcopy(kwargs) + **deepcopy(kwargs), ) np.testing.assert_equal(values_1.values, values_2.values) with pytest.raises(AssertionError): @@ -168,7 +168,8 @@ def test_hoeffding_bound_montecarlo( @pytest.mark.slow @pytest.mark.parametrize( - "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 ~= 6 samples + "a, b, num_points", + [(2, 0, 21)], # training set will have 0.3 * 21 ~= 6 samples ) @pytest.mark.parametrize("scorer, total_atol", [(squashed_r2, 0.2)]) @pytest.mark.parametrize( @@ -222,7 +223,8 @@ def test_linear_montecarlo_with_outlier( @pytest.mark.parametrize( - "a, b, num_points, num_groups", [(2, 0, 21, 2)] # 24*0.3=6 samples in 2 groups + "a, b, num_points, num_groups", + [(2, 0, 21, 2)], # 24*0.3=6 samples in 2 groups ) @pytest.mark.parametrize("scorer, rtol", [(squashed_r2, 0.1)]) @pytest.mark.parametrize( diff --git a/tests/value/shapley/test_truncated.py b/tests/value/shapley/test_truncated.py index d725393ed..b8aa7613a 100644 --- a/tests/value/shapley/test_truncated.py +++ b/tests/value/shapley/test_truncated.py @@ -62,7 +62,8 @@ def test_games( @pytest.mark.parametrize( - "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 ~= 6 samples + "a, b, num_points", + [(2, 0, 21)], # training set will have 0.3 * 21 ~= 6 samples ) @pytest.mark.parametrize("scorer, total_atol", [(squashed_r2, 0.2)]) @pytest.mark.parametrize( diff --git a/tests/value/utils.py b/tests/value/utils.py index c55ab13eb..dcf863651 100644 --- a/tests/value/utils.py +++ b/tests/value/utils.py @@ -4,7 +4,7 @@ from copy import deepcopy from functools import wraps from logging import getLogger -from typing import Callable, Optional, Protocol, Tuple, TypeVar +from typing import Callable, Protocol, Tuple, TypeVar from pydvl.utils.types import Seed @@ -35,8 +35,7 @@ class TimedCallable(Protocol): execution_time: float - def __call__(self, *args, **kwargs) -> ReturnT: - ... + def __call__(self, *args, **kwargs) -> ReturnT: ... def timed(fun: Callable[..., ReturnT]) -> TimedCallable: diff --git a/tox.ini b/tox.ini index d27d9a949..4072c5687 100644 --- a/tox.ini +++ b/tox.ini @@ -32,11 +32,9 @@ commands = [testenv:linting] skip_install = true -setenv = - PYLINTHOME = .pylint.d commands = pre-commit run --all --show-diff-on-failure - bash -c \'python build_scripts/run_pylint.py >>>(pylint-json2html -f jsonextended -o pylint.html) \' + ruff check src/ --fix deps = -r requirements-linting.txt -r requirements.txt From fb65b60dcd030ee68d465beeb78e17fff019fbc1 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 12:28:33 +0100 Subject: [PATCH 05/21] Fix lint job --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index bed9fa95b..18a549378 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -36,7 +36,7 @@ jobs: - name: Lint Code run: | pre-commit run --all --show-diff-on-failure - python build_scripts/run_pylint.py | (pylint-json2html -f jsonextended -o pylint.html) + ruff check src/ --fix shell: bash - name: Generate mypy cache key id: generate-mypy-cache-key From 4d6afb537c88e5a0d0d10db5ced64caf2ff012bd Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 12:58:49 +0100 Subject: [PATCH 06/21] Change np.float_ references to np.float64 --- notebooks/least_core_basic_new.ipynb | 27 ++++-------------- requirements-type-checking.txt | 4 +-- src/pydvl/influence/influence_calculator.py | 1 + .../methods/_solve_least_core_problems.py | 28 +++++++++---------- .../_utility_values_and_sample_masks.py | 2 +- src/pydvl/valuation/methods/gt_shapley.py | 4 +-- src/pydvl/valuation/result.py | 24 ++++++++-------- src/pydvl/valuation/scorers/base.py | 2 +- src/pydvl/valuation/stopping.py | 2 +- src/pydvl/value/least_core/common.py | 2 +- src/pydvl/value/shapley/gt.py | 4 +-- tests/valuation/test_result.py | 4 +-- 12 files changed, 45 insertions(+), 59 deletions(-) diff --git a/notebooks/least_core_basic_new.ipynb b/notebooks/least_core_basic_new.ipynb index 1caacac90..f894cb084 100644 --- a/notebooks/least_core_basic_new.ipynb +++ b/notebooks/least_core_basic_new.ipynb @@ -463,23 +463,15 @@ { "cell_type": "code", "execution_count": null, - "id": "985c27e0", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "hide-output" - ] - }, + "id": "4f9b7b6eb4043dc7", + "metadata": {}, "outputs": [], "source": [ "def compute_removal_score(\n", " u: ModelUtility,\n", " values: ValuationResult,\n", " training_data: Dataset,\n", - " percentages: NDArray[np.float_] | Iterable[float],\n", + " percentages: NDArray[np.float64] | Iterable[float],\n", " *,\n", " remove_best: bool = False,\n", " progress: bool = False,\n", @@ -551,17 +543,8 @@ { "cell_type": "code", "execution_count": null, - "id": "1f95fb06", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "hide-input", - "invertible-output" - ] - }, + "id": "fbfc70f6d7d4f2e4", + "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", diff --git a/requirements-type-checking.txt b/requirements-type-checking.txt index 8d699f2d9..805e73061 100644 --- a/requirements-type-checking.txt +++ b/requirements-type-checking.txt @@ -1,3 +1,3 @@ -mypy==1.5.1 +mypy==1.14 types-tqdm -pandas-stubs \ No newline at end of file +pandas-stubs diff --git a/src/pydvl/influence/influence_calculator.py b/src/pydvl/influence/influence_calculator.py index badda0c80..8f360133f 100644 --- a/src/pydvl/influence/influence_calculator.py +++ b/src/pydvl/influence/influence_calculator.py @@ -357,6 +357,7 @@ def influences( self._validate_dimensions_not_chunked(y) else: x, y = x_test, y_test + assert x is not None and y is not None # For the type checker's benefit def func( x_test_numpy: NDArray, diff --git a/src/pydvl/valuation/methods/_solve_least_core_problems.py b/src/pydvl/valuation/methods/_solve_least_core_problems.py index b491112ee..375e85fe1 100644 --- a/src/pydvl/valuation/methods/_solve_least_core_problems.py +++ b/src/pydvl/valuation/methods/_solve_least_core_problems.py @@ -31,8 +31,8 @@ class LeastCoreProblem(NamedTuple): """ - utility_values: NDArray[np.float_] - A_lb: NDArray[np.float_] + utility_values: NDArray[np.float64] + A_lb: NDArray[np.float64] def lc_solve_problem( @@ -128,7 +128,7 @@ def lc_solve_problem( solver_options=solver_options, ) - values: NDArray[np.float_] | None + values: NDArray[np.float64] | None if subsidy is None: logger.debug("No values were found") @@ -166,13 +166,13 @@ def lc_solve_problem( def _solve_least_core_linear_program( - A_eq: NDArray[np.float_], - b_eq: NDArray[np.float_], - A_lb: NDArray[np.float_], - b_lb: NDArray[np.float_], + A_eq: NDArray[np.float64], + b_eq: NDArray[np.float64], + A_lb: NDArray[np.float64], + b_lb: NDArray[np.float64], solver_options: dict, non_negative_subsidy: bool = False, -) -> Tuple[NDArray[np.float_] | None, float | None]: +) -> Tuple[NDArray[np.float64] | None, float | None]: r"""Solves the Least Core's linear program using cvxopt. $$ @@ -231,7 +231,7 @@ def _solve_least_core_linear_program( "maximum number of iterations in solver_options", RuntimeWarning, ) - subsidy = cast(NDArray[np.float_], e.value).item() + subsidy = cast(NDArray[np.float64], e.value).item() return x.value, subsidy if problem.status in cp.settings.INF_OR_UNB: @@ -244,12 +244,12 @@ def _solve_least_core_linear_program( def _solve_egalitarian_least_core_quadratic_program( subsidy: float, - A_eq: NDArray[np.float_], - b_eq: NDArray[np.float_], - A_lb: NDArray[np.float_], - b_lb: NDArray[np.float_], + A_eq: NDArray[np.float64], + b_eq: NDArray[np.float64], + A_lb: NDArray[np.float64], + b_lb: NDArray[np.float64], solver_options: dict, -) -> NDArray[np.float_] | None: +) -> NDArray[np.float64] | None: r"""Solves the egalitarian Least Core's quadratic program using cvxopt. $$ diff --git a/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py b/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py index 91215c4d6..0d52b702d 100644 --- a/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py +++ b/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py @@ -23,7 +23,7 @@ def compute_utility_values_and_sample_masks( n_samples: int, progress: bool, extra_samples: Iterable[SampleT] | None = None, -) -> Tuple[NDArray[np.float_], NDArray[np.bool_]]: +) -> Tuple[NDArray[np.float64], NDArray[np.bool_]]: """Calculate utility values and sample masks on samples in parallel. Creating the utility evaluations and sample masks is the computational bottleneck diff --git a/src/pydvl/valuation/methods/gt_shapley.py b/src/pydvl/valuation/methods/gt_shapley.py index 83749ea4a..df0e04f31 100644 --- a/src/pydvl/valuation/methods/gt_shapley.py +++ b/src/pydvl/valuation/methods/gt_shapley.py @@ -350,11 +350,11 @@ def solve_group_testing_problem( values = ( np.nan * np.ones_like(n_obs) if not hasattr(v.value, "__len__") - else cast(NDArray[np.float_], v.value) + else cast(NDArray[np.float64], v.value) ) status = Status.Failed else: - values = cast(NDArray[np.float_], v.value) + values = cast(NDArray[np.float64], v.value) status = Status.Converged result = ValuationResult( diff --git a/src/pydvl/valuation/result.py b/src/pydvl/valuation/result.py index 19ff17437..bde8e1bcc 100644 --- a/src/pydvl/valuation/result.py +++ b/src/pydvl/valuation/result.py @@ -205,9 +205,9 @@ class ValuationResult(collections.abc.Sequence, Iterable[ValueItem]): """ _indices: NDArray[IndexT] - _values: NDArray[np.float_] + _values: NDArray[np.float64] _counts: NDArray[np.int_] - _variances: NDArray[np.float_] + _variances: NDArray[np.float64] _data: Dataset _names: NDArray[NameT] _algorithm: str @@ -219,8 +219,8 @@ class ValuationResult(collections.abc.Sequence, Iterable[ValueItem]): def __init__( self, *, - values: Sequence[np.float_] | NDArray[np.float_], - variances: Sequence[np.float_] | NDArray[np.float_] | None = None, + values: Sequence[np.float64] | NDArray[np.float64], + variances: Sequence[np.float64] | NDArray[np.float64] | None = None, counts: Sequence[np.int_] | NDArray[np.int_] | None = None, indices: Sequence[IndexT] | NDArray[IndexT] | None = None, data_names: Sequence[NameT] | NDArray[NameT] | None = None, @@ -311,12 +311,12 @@ def sort( self._sort_order = reverse @property - def values(self) -> NDArray[np.float_]: + def values(self) -> NDArray[np.float64]: """The values, possibly sorted.""" return self._values[self._sort_positions] @property - def variances(self) -> NDArray[np.float_]: + def variances(self) -> NDArray[np.float64]: """Variances of the marginals from which values were computed, possibly sorted. Note that this is not the variance of the value estimate, but the sample @@ -326,10 +326,10 @@ def variances(self) -> NDArray[np.float_]: return self._variances[self._sort_positions] @property - def stderr(self) -> NDArray[np.float_]: + def stderr(self) -> NDArray[np.float64]: """Standard errors of the value estimates, possibly sorted.""" return cast( - NDArray[np.float_], np.sqrt(self._variances / np.maximum(1, self.counts)) + NDArray[np.float64], np.sqrt(self._variances / np.maximum(1, self.counts)) ) @property @@ -686,9 +686,11 @@ def to_dataframe( column = column or self._algorithm df = pd.DataFrame( self._values[self._sort_positions], - index=self._names[self._sort_positions] - if use_names - else self._indices[self._sort_positions], + index=( + self._names[self._sort_positions] + if use_names + else self._indices[self._sort_positions] + ), columns=[column], ) df[column + "_stderr"] = self.stderr[self._sort_positions] diff --git a/src/pydvl/valuation/scorers/base.py b/src/pydvl/valuation/scorers/base.py index a8c9162ba..ac62633a8 100644 --- a/src/pydvl/valuation/scorers/base.py +++ b/src/pydvl/valuation/scorers/base.py @@ -26,7 +26,7 @@ class Scorer(ABC): default: float name: str - range: NDArray[np.float_] + range: NDArray[np.float64] @abstractmethod def __call__(self, model) -> float: ... diff --git a/src/pydvl/valuation/stopping.py b/src/pydvl/valuation/stopping.py index 063ada439..1c78d3b54 100644 --- a/src/pydvl/valuation/stopping.py +++ b/src/pydvl/valuation/stopping.py @@ -585,7 +585,7 @@ class HistoryDeviation(StoppingCriterion): pin_converged: If `True`, once an index has converged, it is pinned """ - _memory: NDArray[np.float_] + _memory: NDArray[np.float64] def __init__( self, diff --git a/src/pydvl/value/least_core/common.py b/src/pydvl/value/least_core/common.py index 5b162f9b7..69c5e48e9 100644 --- a/src/pydvl/value/least_core/common.py +++ b/src/pydvl/value/least_core/common.py @@ -286,7 +286,7 @@ def _solve_least_core_linear_program( "maximum number of iterations in solver_options", RuntimeWarning, ) - subsidy = cast(NDArray[np.float_], e.value).item() + subsidy = cast(NDArray[np.float64], e.value).item() return x.value, subsidy if problem.status in cp.settings.INF_OR_UNB: diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py index 79754e3ec..1b9b46682 100644 --- a/src/pydvl/value/shapley/gt.py +++ b/src/pydvl/value/shapley/gt.py @@ -310,11 +310,11 @@ def reducer( values = ( np.nan * np.ones_like(u.data.indices) if not hasattr(v.value, "__len__") - else cast(NDArray[np.float_], v.value) + else cast(NDArray[np.float64], v.value) ) status = Status.Failed else: - values = cast(NDArray[np.float_], v.value) + values = cast(NDArray[np.float64], v.value) status = Status.Converged return ValuationResult( diff --git a/tests/valuation/test_result.py b/tests/valuation/test_result.py index e30bdd471..c14dc0a78 100644 --- a/tests/valuation/test_result.py +++ b/tests/valuation/test_result.py @@ -385,7 +385,7 @@ def test_adding_different_indices( [ ([0, 1, 2], np.int64, ["a", "b", "c"], " Date: Sun, 12 Jan 2025 13:03:02 +0100 Subject: [PATCH 07/21] Drop old deps --- requirements-dev.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index b28caa90a..a43bc1a1d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,5 @@ tox<4.0.0 tox-wheel -black[jupyter] == 24.3.0 -isort == 5.12.0 -r requirements-linting.txt -r requirements-type-checking.txt bump2version==1.0.1 From a597b9b45993dc9b632e09fbc04fbb0ffd0c87e9 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 13:33:57 +0100 Subject: [PATCH 08/21] Fix some circular imports --- src/pydvl/value/least_core/common.py | 2 +- tests/influence/torch/conftest.py | 37 ++++++++++++++++++++++++++++ tests/influence/torch/test_util.py | 37 ---------------------------- tests/value/__init__.py | 2 +- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/pydvl/value/least_core/common.py b/src/pydvl/value/least_core/common.py index 69c5e48e9..9f23d6989 100644 --- a/src/pydvl/value/least_core/common.py +++ b/src/pydvl/value/least_core/common.py @@ -15,7 +15,7 @@ _maybe_init_parallel_backend, ) from pydvl.utils import Status, Utility -from pydvl.value import ValuationResult +from pydvl.value.result import ValuationResult __all__ = [ "_solve_least_core_linear_program", diff --git a/tests/influence/torch/conftest.py b/tests/influence/torch/conftest.py index 37459f1cc..f6a119c0a 100644 --- a/tests/influence/torch/conftest.py +++ b/tests/influence/torch/conftest.py @@ -6,6 +6,9 @@ from torch.optim import LBFGS from torch.utils.data import DataLoader +from pydvl.influence.torch.util import flatten_dimensions +from tests.influence.conftest import linear_hessian_analytical, linear_model + DATA_OUTPUT_NOISE: float = 0.01 @@ -71,3 +74,37 @@ def device(request): return torch.device("cuda") else: return torch.device("cpu") + + +def linear_torch_model_from_numpy(A: NDArray, b: NDArray) -> torch.nn.Module: + """ + Given numpy arrays representing the model $xA^t + b$, the function returns the corresponding torch model + :param A: + :param b: + :return: + """ + output_dimension, input_dimension = tuple(A.shape) + model = torch.nn.Linear(input_dimension, output_dimension) + model.eval() + model.weight.data = torch.as_tensor(A, dtype=torch.get_default_dtype()) + model.bias.data = torch.as_tensor(b, dtype=torch.get_default_dtype()) + return model + + +@pytest.fixture +def model_data(request): + dimension, condition_number, train_size = request.param + A, b = linear_model(dimension, condition_number) + x = torch.rand(train_size, dimension[-1]) + y = torch.rand(train_size, dimension[0]) + torch_model = linear_torch_model_from_numpy(A, b) + vec = flatten_dimensions( + tuple( + torch.rand(*p.shape) + for name, p in torch_model.named_parameters() + if p.requires_grad + ) + ) + H_analytical = linear_hessian_analytical((A, b), x.numpy()) + H_analytical = torch.as_tensor(H_analytical) + return torch_model, x, y, vec, H_analytical.to(torch.float32) diff --git a/tests/influence/torch/test_util.py b/tests/influence/torch/test_util.py index 16e4feeb6..3a329e37d 100644 --- a/tests/influence/torch/test_util.py +++ b/tests/influence/torch/test_util.py @@ -8,7 +8,6 @@ torch = pytest.importorskip("torch") import torch.nn # noqa: F811 -from numpy.typing import NDArray from scipy.stats import pearsonr, spearmanr from torch.nn.functional import mse_loss from torch.utils.data import DataLoader, TensorDataset @@ -24,12 +23,10 @@ TorchLinalgEighException, TorchTensorContainerType, align_structure, - flatten_dimensions, safe_torch_linalg_eigh, torch_dataset_to_dask_array, ) from tests.conftest import is_osx_arm64 -from tests.influence.conftest import linear_hessian_analytical, linear_model @dataclass @@ -85,40 +82,6 @@ class UtilTestParameters: ] -def linear_torch_model_from_numpy(A: NDArray, b: NDArray) -> torch.nn.Module: - """ - Given numpy arrays representing the model $xA^t + b$, the function returns the corresponding torch model - :param A: - :param b: - :return: - """ - output_dimension, input_dimension = tuple(A.shape) - model = torch.nn.Linear(input_dimension, output_dimension) - model.eval() - model.weight.data = torch.as_tensor(A, dtype=torch.get_default_dtype()) - model.bias.data = torch.as_tensor(b, dtype=torch.get_default_dtype()) - return model - - -@pytest.fixture -def model_data(request): - dimension, condition_number, train_size = request.param - A, b = linear_model(dimension, condition_number) - x = torch.rand(train_size, dimension[-1]) - y = torch.rand(train_size, dimension[0]) - torch_model = linear_torch_model_from_numpy(A, b) - vec = flatten_dimensions( - tuple( - torch.rand(*p.shape) - for name, p in torch_model.named_parameters() - if p.requires_grad - ) - ) - H_analytical = linear_hessian_analytical((A, b), x.numpy()) - H_analytical = torch.as_tensor(H_analytical) - return torch_model, x, y, vec, H_analytical.to(torch.float32) - - @pytest.mark.torch @pytest.mark.parametrize( "model_data, tol", diff --git a/tests/value/__init__.py b/tests/value/__init__.py index 19a703d2d..a49cd76e2 100644 --- a/tests/value/__init__.py +++ b/tests/value/__init__.py @@ -4,7 +4,7 @@ from scipy.stats import spearmanr from pydvl.utils import Utility -from pydvl.value import ValuationResult +from pydvl.value.result import ValuationResult def polynomial(coefficients, x): From 6916ee266fad895fcadc93bdfbb0552c392d1d20 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 13:35:54 +0100 Subject: [PATCH 09/21] Drop requirement numpy <2 --- requirements.txt | 4 ++-- .../valuation/methods/_utility_values_and_sample_masks.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0a08a506a..7b3ada654 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pyDeprecate>=0.3.2 -numpy>=1.20,<2 +numpy>=1.20 pandas>=1.3 scikit-learn scipy>=1.7.0 @@ -8,4 +8,4 @@ joblib @ git+https://github.com/joblib/joblib@c2087dbdeec9824c45822670395a8a0c45 cloudpickle tqdm matplotlib -typing_extensions \ No newline at end of file +typing_extensions diff --git a/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py b/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py index 0d52b702d..c389254c7 100644 --- a/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py +++ b/src/pydvl/valuation/methods/_utility_values_and_sample_masks.py @@ -104,4 +104,4 @@ def _create_mask_and_utility_values( masks.extend(m) u_values.extend(v) - return np.array(u_values), np.row_stack(masks) + return np.array(u_values), np.vstack(masks) From 0562b800dd9f76aed668d5f488603b128b2d69b1 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 13:36:15 +0100 Subject: [PATCH 10/21] Add extra deps as .txt for convenient installation --- requirements-extras.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 requirements-extras.txt diff --git a/requirements-extras.txt b/requirements-extras.txt new file mode 100644 index 000000000..4af782d74 --- /dev/null +++ b/requirements-extras.txt @@ -0,0 +1,6 @@ +dask>=2023.5.0 +distributed>=2023.5.0 +ray>=0.8 +torch>=2.0.0 +zarr>=2.16.1 +pymemcache>=3 From 9e3d004e9627bf97ba0e2ca715f3b37406112a56 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 13:45:32 +0100 Subject: [PATCH 11/21] Members defined within an enum class should not include explicit type annotations --- src/pydvl/influence/torch/util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py index c1b66ac93..bf182aeb1 100644 --- a/src/pydvl/influence/torch/util.py +++ b/src/pydvl/influence/torch/util.py @@ -625,14 +625,14 @@ class BlockMode(Enum): FULL: Groups all parameters together. """ - LAYER_WISE: str = "layer_wise" - PARAMETER_WISE: str = "parameter_wise" - FULL: str = "full" + LAYER_WISE = "layer_wise" + PARAMETER_WISE = "parameter_wise" + FULL = "full" class SecondOrderMode(Enum): - HESSIAN: str = "hessian" - GAUSS_NEWTON: str = "gauss_newton" + HESSIAN = "hessian" + GAUSS_NEWTON = "gauss_newton" @dataclass From 2839f81fada52a892162876a91b85486f63e3b05 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 13:47:27 +0100 Subject: [PATCH 12/21] Hack --- src/pydvl/parallel/futures/ray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pydvl/parallel/futures/ray.py b/src/pydvl/parallel/futures/ray.py index 002d71c04..258fd6b3d 100644 --- a/src/pydvl/parallel/futures/ray.py +++ b/src/pydvl/parallel/futures/ray.py @@ -240,7 +240,7 @@ def set_future(data: Any) -> None: self.future.object_ref = ref # type: ignore if sys.version_info >= (3, 9): - __class_getitem__ = classmethod(types.GenericAlias) + __class_getitem__ = classmethod(types.GenericAlias) # type: ignore class _WorkItemManagerThread(threading.Thread): From ca32a66e9313f375236dd37193b5b28ff603cc78 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 14:19:31 +0100 Subject: [PATCH 13/21] Revert to requirement numpy<2 because of dask --- requirements-constraints.txt | 1 + requirements-notebooks.txt | 3 ++- requirements-type-checking.txt | 1 + requirements.txt | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 requirements-constraints.txt diff --git a/requirements-constraints.txt b/requirements-constraints.txt new file mode 100644 index 000000000..4d3693d60 --- /dev/null +++ b/requirements-constraints.txt @@ -0,0 +1 @@ +numpy<2 diff --git a/requirements-notebooks.txt b/requirements-notebooks.txt index ac589844b..b9f3e96d0 100644 --- a/requirements-notebooks.txt +++ b/requirements-notebooks.txt @@ -1,5 +1,6 @@ datasets==2.21.0 -distributed==2023.5.0 +dask==2024.8.0 +distributed==2024.8.0 pillow==10.4.0 torch==2.2.0 torchvision==0.17.0 diff --git a/requirements-type-checking.txt b/requirements-type-checking.txt index 805e73061..8fb920225 100644 --- a/requirements-type-checking.txt +++ b/requirements-type-checking.txt @@ -1,3 +1,4 @@ mypy==1.14 types-tqdm pandas-stubs +-c requirements-constraints.txt diff --git a/requirements.txt b/requirements.txt index 7b3ada654..d0beb4ebb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pyDeprecate>=0.3.2 -numpy>=1.20 +numpy>=1.20,<2 pandas>=1.3 scikit-learn scipy>=1.7.0 From 21fd8830830a2474e69d92b543ee4baac58e613e Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 14:23:19 +0100 Subject: [PATCH 14/21] More numpy 2 compatibility changes --- src/pydvl/valuation/result.py | 10 +++---- src/pydvl/valuation/samplers/permutation.py | 2 +- tests/valuation/test_result.py | 30 ++++++++++----------- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/pydvl/valuation/result.py b/src/pydvl/valuation/result.py index bde8e1bcc..94c51b9d2 100644 --- a/src/pydvl/valuation/result.py +++ b/src/pydvl/valuation/result.py @@ -238,15 +238,11 @@ def __init__( self._algorithm = algorithm self._status = Status(status) # Just in case we are given a string - self._values = np.array(values, copy=False) + self._values = np.asarray(values) self._variances = ( - np.zeros_like(values) - if variances is None - else np.array(variances, copy=False) - ) - self._counts = ( - np.ones_like(values) if counts is None else np.array(counts, copy=False) + np.zeros_like(values) if variances is None else np.asarray(variances) ) + self._counts = np.ones_like(values) if counts is None else np.asarray(counts) self._sort_order = None self._extra_values = extra_values or {} diff --git a/src/pydvl/valuation/samplers/permutation.py b/src/pydvl/valuation/samplers/permutation.py index a14806a03..79b398c8d 100644 --- a/src/pydvl/valuation/samplers/permutation.py +++ b/src/pydvl/valuation/samplers/permutation.py @@ -129,7 +129,7 @@ class DeterministicPermutationSampler(PermutationSampler): def _generate(self, indices: IndexSetT) -> SampleGenerator: for permutation in permutations(indices): - yield Sample(-1, np.array(permutation, copy=False)) + yield Sample(-1, np.asarray(permutation)) def sample_limit(self, indices: IndexSetT) -> int: if len(indices) == 0: diff --git a/tests/valuation/test_result.py b/tests/valuation/test_result.py index c14dc0a78..7f8a4ec58 100644 --- a/tests/valuation/test_result.py +++ b/tests/valuation/test_result.py @@ -31,19 +31,19 @@ def dummy_values(values, names): ) def test_sorting(values, names, ranks_asc, dummy_values): dummy_values.sort(key="value") - assert np.alltrue([it.value for it in dummy_values] == sorted(values)) - assert np.alltrue(dummy_values.indices == ranks_asc) - assert np.alltrue( + assert np.all([it.value for it in dummy_values] == sorted(values)) + assert np.all(dummy_values.indices == ranks_asc) + assert np.all( [it.value for it in reversed(dummy_values)] == sorted(values, reverse=True) ) dummy_values.sort(reverse=True) - assert np.alltrue([it.value for it in dummy_values] == sorted(values, reverse=True)) - assert np.alltrue(dummy_values.indices == list(reversed(ranks_asc))) + assert np.all([it.value for it in dummy_values] == sorted(values, reverse=True)) + assert np.all(dummy_values.indices == list(reversed(ranks_asc))) dummy_values.sort(key="index") - assert np.alltrue(dummy_values.indices == list(range(len(values)))) - assert np.alltrue([it.value for it in dummy_values] == values) + assert np.all(dummy_values.indices == list(range(len(values)))) + assert np.all([it.value for it in dummy_values] == values) @pytest.mark.parametrize( @@ -55,16 +55,16 @@ def test_dataframe_sorting(values, names, ranks_asc, dummy_values): import pandas # noqa: F401 df = dummy_values.to_dataframe(use_names=False) - assert np.alltrue(df.index.values == ranks_asc) + assert np.all(df.index.values == ranks_asc) df = dummy_values.to_dataframe(use_names=True) - assert np.alltrue(df.index.values == sorted_names) - assert np.alltrue(df["dummy_valuator"].values == sorted(values)) + assert np.all(df.index.values == sorted_names) + assert np.all(df["dummy_valuator"].values == sorted(values)) dummy_values.sort(reverse=True) df = dummy_values.to_dataframe(use_names=True) - assert np.alltrue(df.index.values == list(reversed(sorted_names))) - assert np.alltrue(df["dummy_valuator"].values == sorted(values, reverse=True)) + assert np.all(df.index.values == list(reversed(sorted_names))) + assert np.all(df["dummy_valuator"].values == sorted(values, reverse=True)) except ImportError: pass @@ -87,15 +87,15 @@ def test_todataframe(ranks_asc, dummy_values): df = dummy_values.to_dataframe() assert "dummy_valuator" in df.columns assert "dummy_valuator_stderr" in df.columns - assert np.alltrue(df.index.values == ranks_asc) + assert np.all(df.index.values == ranks_asc) df = dummy_values.to_dataframe(column="val") assert "val" in df.columns assert "val_stderr" in df.columns - assert np.alltrue(df.index.values == ranks_asc) + assert np.all(df.index.values == ranks_asc) df = dummy_values.to_dataframe(use_names=True) - assert np.alltrue(df.index.values == [it.name for it in dummy_values]) + assert np.all(df.index.values == [it.name for it in dummy_values]) @pytest.mark.parametrize( From 61448d2c3276652ac022ba505402a47526c51ffd Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 15:22:31 +0100 Subject: [PATCH 15/21] Fix ruff config: avoid messing up fixture imports, don't auto-fix --- .pre-commit-config.yaml | 6 +++++- pyproject.toml | 11 ++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 52cb7ad7c..19f5c4857 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,11 @@ repos: rev: v0.9.1 hooks: - id: ruff - args: [ --fix ] + # HACK: ruff-pre-commit ignores pyproject.toml + # https://github.com/astral-sh/ruff-pre-commit/issues/54 + args: [ "--extend-per-file-ignores", "tests/**/*.py:F811", + "--extend-per-file-ignores", "tests/**/*.py:F401", + "--fix" ] - id: ruff-format - repo: https://github.com/kynan/nbstripout rev: 0.6.1 diff --git a/pyproject.toml b/pyproject.toml index 3404cdf7e..90b264453 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 -fix = true +fix = false target-version = "py39" [tool.ruff.lint] @@ -69,11 +69,16 @@ ignore = [ "E501", # line too long (HACK: we should really stick to 88 chars) "E741", # Checks for 'l', 'O', or 'I' as variable names (hard to read) "E731", # Do not assign a `lambda` expression, use a `def` - "E402" - ] isort.known-first-party = ["pydvl"] +[tool.ruff.lint.extend-per-file-ignores] +"__init__.py" = ["E402"] +"tests/influence/*.py" = ["E402"] +# Avoid removing imports of fixtures in tests +# (https://github.com/astral-sh/ruff/issues/10662) +"tests/**/*.py" = ["F811", "F401"] + [tool.mypy] python_version = "3.9" mypy_path = './src/' From 2fb431859a3e932e5fa614b336c9b5c458cbe54b Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 15:22:35 +0100 Subject: [PATCH 16/21] =?UTF-8?q?Missing=20fixture=20import=20(removed=20b?= =?UTF-8?q?y=20ruff=20=F0=9F=99=84)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/influence/test_influence_calculator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/influence/test_influence_calculator.py b/tests/influence/test_influence_calculator.py index 2ef667b76..bfd976e2a 100644 --- a/tests/influence/test_influence_calculator.py +++ b/tests/influence/test_influence_calculator.py @@ -31,6 +31,7 @@ TorchNumpyConverter, ) from pydvl.influence.types import UnsupportedInfluenceModeException +from tests.influence.torch.test_influence_model import model_and_data, test_case from tests.influence.torch.test_util import are_active_layers_linear From 748f669c32b8dbd42a8e52fe60911656b2e27994 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 16:23:14 +0100 Subject: [PATCH 17/21] Limit zarr version to < 3 --- requirements-extras.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-extras.txt b/requirements-extras.txt index 4af782d74..41a57fc2f 100644 --- a/requirements-extras.txt +++ b/requirements-extras.txt @@ -2,5 +2,5 @@ dask>=2023.5.0 distributed>=2023.5.0 ray>=0.8 torch>=2.0.0 -zarr>=2.16.1 +zarr>=2.16.1,<3 pymemcache>=3 diff --git a/setup.py b/setup.py index 270d16cdf..32063aed5 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "torch>=2.0.0", "dask>=2023.5.0", "distributed>=2023.5.0", - "zarr>=2.16.1", + "zarr>=2.16.1,<3", ], "ray": ["ray>=0.8"], }, From 1c0b5bafce36dd818769291f11c22eec44ac59c0 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 16:27:41 +0100 Subject: [PATCH 18/21] Use updated joblib version for generator_unordered --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d0beb4ebb..5f3492dd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ pandas>=1.3 scikit-learn scipy>=1.7.0 cvxpy>=1.3.0 -joblib @ git+https://github.com/joblib/joblib@c2087dbdeec9824c45822670395a8a0c45be2211 +joblib>=1.4.0 cloudpickle tqdm matplotlib From 504a6391f6594b89ee964b490c1bee3d1ba3a6b1 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 18:28:18 +0100 Subject: [PATCH 19/21] Remove import --- src/pydvl/value/shapley/knn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pydvl/value/shapley/knn.py b/src/pydvl/value/shapley/knn.py index c7eee1974..9843052b1 100644 --- a/src/pydvl/value/shapley/knn.py +++ b/src/pydvl/value/shapley/knn.py @@ -17,7 +17,6 @@ from typing import Dict, Union import numpy as np -from numpy.typing import NDArray from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors from tqdm.auto import tqdm From f4201a21fb79d247b590ea310f0a74a2598b7647 Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 19:35:59 +0100 Subject: [PATCH 20/21] Reduce n_epochs in CI for msr_banzhaf_digits.ipynb --- notebooks/msr_banzhaf_digits.ipynb | 104 ++++++++--------------------- 1 file changed, 26 insertions(+), 78 deletions(-) diff --git a/notebooks/msr_banzhaf_digits.ipynb b/notebooks/msr_banzhaf_digits.ipynb index ce453b630..3a5bf7595 100644 --- a/notebooks/msr_banzhaf_digits.ipynb +++ b/notebooks/msr_banzhaf_digits.ipynb @@ -43,12 +43,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "tags": [ - "hide" - ] - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -70,7 +66,6 @@ "\n", "is_CI = os.environ.get(\"CI\")\n", "random_state = 24\n", - "n_jobs = 16\n", "random.seed(random_state)" ] }, @@ -123,53 +118,31 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "hide" - ] - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "# In CI we only use a subset of the training set\n", + "# Reduce computation time for CI\n", "training_data = list(training_data)\n", "if is_CI:\n", " training_data[0] = training_data[0][:10]\n", " training_data[1] = training_data[1][:10]\n", " max_checks = 1\n", + " n_jobs = 2\n", + " n_epochs = 1\n", "else:\n", " training_data[0] = training_data[0][:200]\n", " training_data[1] = training_data[1][:200]\n", - " max_checks = 1000" + " max_checks = 1000\n", + " n_jobs = 16\n", + " n_epochs = 40" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "tags": [ - "hide-input", - "invertible-output" - ] - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWkAAAGJCAYAAABIP8LMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoOUlEQVR4nO3de7xcZX3v8c+XcFHITe5CihsEjx6lIFAEixLFCwdsA1bFCy1R+zqCWI3YVyuvXtjgBauUi7XV1iqJp6hYL0GliFJJCgjkQEn0gBRtSBBIuEnCTQIkv/PH80wyTGZm71l7Ls/s/X2/XvPae69Zz3qeWfOb76xZa+01igjMzKxM2wx6AGZm1ppD2sysYA5pM7OCOaTNzArmkDYzK5hD2sysYA5pM7OCOaTNzArmkDYzK5hDugskrZK0qofLXygpJI30qo/JTNJ2ks6W9AtJG/K6PGHQ4+oVSSP5MS4c9FgaSZqfxzZ/0GMZFh2HdF7BY93mdn+oZpV9BPhr4F7gPOBs4PaBjmiC8utsyaDHMSiSRoclayY61m0n0PfZbe5bNYHl2tbOBD4F3DPogQypNwGPAa+PiKcGPRizTlQO6YgY7eI4rI2IWAOsGfQ4hthewEMOaBtKEdHRDYjUbFzz7gusA34NvKDhvp2AnwMbgbl1019E2mq8CXgA2ACsBv4JmNOkj7l5TKPAYcAPgPXAw8C3gN/K8+0HfD0v8zfA1cBBTZa3MC9vP+AM0sfiJ4G7gQuAmU3arAJWtVgH78h9rcvL+Tnwl8AOHazz2phG6qaN5GkLgRcC3wQeAh4Ffgi8LM+3W153a3L//xd4TZM+9iLtErgOWAs8Rdo98FXgf7YYl4APAbflZd8DfA6Y1a11ArwK+F5e/xvy2G4AzupgvTXeVjVZhy8CLgXuBzaRa5K0S/DUvN4eAx7Pv58GbNPi9bEE2AP4MnBfbvMT4FV1tf8ZUl1vAG4F3jrOWpjf4jEFMNrkcY2Q6v7BvK5vAt7UZvkTrte8nP2BfyW9DmuP//i68c9vmP81pDq9DXiE9Br9f8BZwHOavN6aroMJ5IiAU/I4H8iP/VfAlcBJTeafQ6r1lXnZDwHfBX6n07GOuS47WfF1RTj+DuAtuc11wLZ10xfl6Wc1zP/RXCDfAT5L2od4BemFswbYu2H+uXk5l+cn9ge5zZV5+n8BL85Fei3wt6RA20R6QU5v8cK+LBfYPwJ/AyzP029qUTRbBRLpRRr5yf5S7vu6PO3q+vUxzrBpFtJL8mO7Ji//W/mxPQgcAPw3cAtwIfAVUvg+CezT0MfbgSfyevz7/Ji/ned/jOZvaP+Qx3BP3XN1B7AsT5vQOgGOJb2JP5zr5ZPAF4ClwH3jWG8nkN681+XbaL4taFiH1+Q+biS9EX8BOCTPc0me5668Di9gywvvkhavj+XAL5us9yeAg0hvMnfk9fxPpDfWTcAR43hMB+fHEHkco3W3uQ2P62pSjd+Qx70oP/cbaf5G3a16PYBUfwH8W37evgE8TQqyZiH9g/x4vkp6A/s74D/r+p5WN+8CUt3X3og2r4MJ5Mgn8/JW5uflk8DFpDeKbzbMe0h+fJvyMs/L41hHCuzjOhlrz0K6oTjqbx9t82I+N/99Sv77xzRsjQB703yL6g25uD7fIqQDeFfDfV/K038N/EXDfX+V7/tQi0B8kLqtf9IW1bfyfX/V0GYVDYHEli2GbwPPbbhvtFnfbdZ5bUzNQjraPLZfkwJnm7r7/jDfd0FDm92BGU36PogU0lc0TH8VW94EZ9dN3x74D+q2WKuuk7r1fVCTce3aQc1u9fw0WYefbHL/O/J9/0ndmzlpS/imfN87W7w+Wq33X5M+GTyn7r7auvxOh6/DJS3uq39cZzXc98Y8/d96WK8/bDY/MK9uXPMb7tsPUJNlfSzPf1KLMc1tMYZOc+Qh0qe1HdvVGmkX8S9Jb3ZHN8y3F2njZE1932ONdcz12XGD1h+1ard1Tdo8h7R1sQn4AOlFfz/w/A77/imwsmHa3NzvNU3mf3W+707q3onzfS/I913cMH0hTYK4rpA2Anc2TF/F1oF0C2nLYXaT5UwjvQksG+fjro1ppG7aSJvHtk++73Eagjf3/TRwdQfr/bu5KLerm/bPuY8/ajL/79I8pDtaJ2wJ6RdVKe52z0/DOlxL8xf0j/L9b2hy3zH5vh83eX20W+8B7NdkeXc21tUYj2k8Ib2qsTby/auBB3tRr6TdAEHaIm3W9xKahHSb5e2c5/9yw/RRKgYfzXPkofwctN2tw5Y3ms+0uP9D+f7jujHWiJjQgUN1MO+Tkk4ibX38XR7wWyIdEHsWSQLeRXpnPwh4HqlIalod/LmpybR788/lEbGx4b7amRJzWixvaeOEiFgp6VfAiKTZEbGuWUNJO+axPwgsSA9pKxuAl7TouxPNHlvtcd8REY/W3xERGyXdR5PHLel40v7Xw4Bd2frA8q5sOYD58vzz2iZjugF4pmHZVdbJJcCbgRslXUr62HtdRNzdrPEErIiIDU2mH0LasFjS5L6lpDfslze5r9163ykiVjZpcw/wio5GPbZmtQFpd8aRtT+6XK+b66JF30uAoxsnStqJFHAnkvYnzyDtJ67Zexx91y+v0xy5BPgT4DZJ3yA9v9dHxPqG+Wrr7QWSRpt0fUD++RLSrp4Jm8gpeJ26g/QO9krSwYEftpjvfNJ+nDWk/cr3kPY1Q1rhL2jRrnFlwpag2Oq+iHgmF+N2LZZ3X4vpa/MYZpH2QTXzPFKB7UY68NFL7R5bs3UCab0863FL+hBp/+nDpC3Iu0j7UIO0b/cgYIe6JrPyz63WUw6khxomd7xOIuLbkt5EOs/5PcD78lhvBs6MiB+NZznjsLbF9FnAr6PJWSF5HT9I2k3UqN16b3dft1+P69r0Vf8/Et2s15Z1kW21riVtR9r1eThpH/ClpIN3T+dZzuLZtTcenebIh0lb/+8m7c/+KPCMpH8DPhIRv8zz7ZJ/vnWM/qd3ON6W+hnSHyUF9IPAS0nn/n6ifgZJuwMfJD1Rr2zcGpH0jv4MFUhH5/+ryfQ9889WL7b6+26JiEO6OqoekLQt6SPZWtIBszUN9x/ZpNkj+ecepOKun38aqZjrz+uutE4i4nLg8ryl9QrSOc+nAd+X9PKIuG28y2rXTYvp64GdJW0XEU/X35HX2a5sWQ/DrJv1WlvWHi3u37PJtHmkgF4YEe+uv0PS8+nwjaNKjuSt/guBC3P7o0gH098KvFTSS/OnrdrjmxcR3+1kXFX15d/CJb0SOIcUei/LP8+WdFTDrPvlMf2wyYqdk+/vl2YfyfYDfou0f3Ndq4YR8RjptKqXStq5ZyPsnl2B2cBPmgT0dNLH/ka35J+NzyHAETRsAEx0nUTE4xHx44g4g3TkfXvgf3W6nA7dQqrHVze579Wkj8//2eMxtLKJZ398r6zL9bq5LvKbdaO5Tabtn39+u8l9W70Os9qulGZ9TChHIuL+iPh2RLyNtIX/QlJuQdqVB+lg73i1G+uYeh7Skp4HfI000LdHxH3ASaSPXF9tKIpV+eeznuAcFF+kv1v+H5K0+SORpG1IpwZtQzo1Zyznk4Lky5JmN94p6XmSStnKvp+0a+PQvK6BzR9DLyKFeKOv5J9/IWlWXZvtSSHaTEfrRNKr8xZro9pW2hMtH1F3fDn/PDfvt62Na0fSObiQziAahIdIGwzd0pV6zccLfkT6H4kPNCxjHs1Dd1X+Obdh/v1Ip4I2U9udtk+b5Y0rRyTtIOl3GxeS67+WT7Vau4x0Wuvpko5rNjBJR9bXyxhjHVPl0Gux07xmcUQsz79/mTS4D9amRcQKSR8hnQy+EPj9PH2tpK+TPmYsl/RD0j6u15POLlhOOk+0H67LY7iU9BHnjaT9sjcDnx6rcUR8WdKhwPuB/5Z0JWk/786kAn41KexP7c3wxy8iNkn6LGmX1M8kXUZ6wb6GNN6r8+/1bZZK+ifgfwO3SvoWaR/i75HW172krb36Np2uk88Ce0u6jvTCewo4FHgt6QyFr3dxNWwlIr6ag+Vt+TEuZss++n2BSyPikl6OoY1/B94u6Xukrfmngf+IiP+osrAu1+vpwPWkXQdvAFaQtpZPJJ1++HsN83+PdFrbGZIOJG2N70PatXU5zcPtalJ9nSvpZaRjKUTExyvkyHOBayX9kvT6Xk06I+31pAOA342In+flPy3pzaT93JdL+kle3hOkN83fIW2pP58twd5yrGOvSnpyCt7m02tIR0sDuKzFsr6d7/9w3bQdSfuqa+ci/op0cvku5NN3GpYxNy9jtMnyR/J9C9s8liUN0xbm6fuRDljV/uPwHtI+q07/4/BNwPdJW6tPkfb7LgM+Drx4nOu8NqaRiTy2duMlvWGfQTqo+5s8zv9DOsCyVf+5zTakAy63k47+35ufq1mkf9BYPpF1QgrHrwG/IJ22+QhpP+MngN06qNmmz89Y67DuMb6fdPbQE/l2MymIWv7HYSfjyPdtVdtjPKbdSf/4cR/pU+rm18A4aqNlX92o17yc/Un/NLaOdEri9bT/j8PfIp1hUTvAdyvwZ7kum65T4GRSQP4mzxN19407R0gH0f+M9I8pd+X5HyDt2jgV2L7F+v9Urscncn3+Ij/mk2n4x592Yx3rprwAy5Qu73gKsG9ErBrsaIaTpANIZ/N8PSL6ebDXbNLx9aStMkl75n319dN2JH3igPQvuWY2Af08EGeTzwLgHUrXNV5DOr3qGNI/ylxBusCOmU2AQ9om4kekg6lvIB1geoa0m+OzwIXhfWlmE+Z90mZmBfM+aTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgm076AGMRZKAvYBHBz0WG5cZwL0REYMeyLBz7Q+dntR+8SFNKtK7Bz0I68gc4J5BD2IScO0Pn67X/jCEdF+3Iq699tqO2xx44IE9GElz69evr9TuqKOO6rjNXXfdVakvvOXXLX1dj1Xq+P3vf3+lvt75zndWalfF8ccf33GbKjmQdf05G4aQ7qvp06d33GbmzJk9GElzVT9JbbONDz9Ye9OmTeu4zXOf+9xKffXzNbPttsMdc3155Uo6XdIqSU9KulHS4f3o12zQXPs2UT0PaUknAecDZwOHACuAKyXt3uu+zQbJtW/d0I8t6TOAL0bExRFxG3Aq8ATwnj70bTZIrn2bsJ6GtKTtgUOBq2rTImJT/vvIFm12kDSzdiOd1mI2VFz71i293pLeFZgG3Ncw/T5gzxZtzgTW1918CpINI9e+dUWJh/zPBWbV3eYMdjhmfePat630+tyUB4GNwB4N0/cA1jZrEBEbgA21v9M/XZkNHde+dUVPt6Qj4ingZuCY2jRJ2+S/r+9l32aD5Nq3bunHWd7nA4sk3QQsAxYAOwEX96Fvs0Fy7duE9TykI+JSSbsB55AOmCwHjo2IxgMqRZg9e3bHbVasWFGpr1WrVnXcZt68eZX6sv4bttpfvnx5x23mz59fqa8FCxZ03KbK+AAOPvjgjtssWbKkUl+90Jf/l4yIzwGf60dfZiVx7dtElXh2h5mZZQ5pM7OCOaTNzArmkDYzK5hD2sysYA5pM7OCOaTNzArmkDYzK5hD2sysYA5pM7OCOaTNzArmkDYzK1hfLrA0TKpcaWtkZKRSX3Pnzu24zbvf/e5KfVW54p5Zr1S52mSVNlD96nml8Ja0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMF9gqcHFF1/ccZvFixdX6mvRokUdt1m4cGGlvszGUuWCX1WNjo72ra9Zs2b1ra9e8Ja0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBfBW8BieeeGLHbVavXl2prxNOOKFSO7N2Zs+eXaldlas59vMKcxdddFGlduvXr+/ySPrLW9JmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBFBGDHkNbkmYCHV8hpepFZpYvX95xm6oXSlqyZEnHbUZHRyv1deGFF1ZqV9GsiHiknx1ORlVrv6qRkZGO21Stqyp9HXzwwZX66rOu1763pM3MCuaQNjMrmEPazKxgPQ1pSaOSouF2ey/7NCuBa9+6pR/fzHIr8Lq6v5/pQ59mJXDt24T1I6SfiYi1fejHrDSufZuwfuyTPkDSvZJWSrpE0j7tZpa0g6SZtRswow9jNOsF175NWK9D+kZgPnAscBqwL3CNpHbFdybp3NDa7e4ej9GsF1z71hU9DemIuCIi/jUifhoRVwLHAbOBt7Vpdi4wq+42p5djNOsF1751Sz/2SW8WEesk3QHs32aeDcCG2t+S+jE0s55y7VtVfT1PWtJ04IXAmn72azZorn2rqtfnSZ8n6WhJI5JeCXwH2Ah8rZf9mg2aa9+6pde7O+aQinIX4AHgWuCIiHigx/2aDZpr37qipyEdEW/v5fLbqXpluipXwavSpqp169b1rS+rbpC1X1WV2po3b16lvqq+PqciX7vDzKxgDmkzs4I5pM3MCuaQNjMrmEPazKxgDmkzs4I5pM3MCuaQNjMrmEPazKxgDmkzs4I5pM3MCuaQNjMrWF8v+t9PixcvrtRuwYIFHbeJiEp9rV+/vuM2/byYk00tIyMjHbdZunRppb4uu+yySu2mIm9Jm5kVzCFtZlYwh7SZWcEc0mZmBXNIm5kVzCFtZlYwh7SZWcEc0mZmBXNIm5kVzCFtZlYwh7SZWcEc0mZmBZu0F1iqetGjxx57rOM2jzzySKW+qrTbuHFjpb7MxlKlth5//PEejMTqqWqY9YukvYG7Bz0O68iciLhn0IMYdq79odT12h+GkBawF/Bok7tnkIp4Tov7p5JS1sUM4N4ovbCGQJvaL+W5LkFJ66IntV/87o78gJu+M6UaBuDRiKi2z2GSKGhdTOnnoZta1X5Bz/XAFbYuetK/DxyamRXMIW1mVrBhD+kNwNn551TndTF1+LneYtKvi+IPHJqZTWXDviVtZjapOaTNzArmkDYzK5hD2sysYA5pM7OCDW1ISzpd0ipJT0q6UdLhgx5Tv0kalRQNt9sHPS7rLdf+1Kr9oQxpSScB55POjzwEWAFcKWn3gQ5sMG4Fnl93O2qww7Fecu0/y5So/aEMaeAM4IsRcXFE3AacCjwBvGewwxqIZyJibd3twUEPyHrKtb/FlKj9oQtpSdsDhwJX1aZFxKb895GDGtcAHSDpXkkrJV0iaZ9BD8h6w7W/lSlR+0MX0sCuwDTgvobp9wF79n84A3UjMB84FjgN2Be4RtKMQQ7Kesa1v8WUqf3iL1VqrUXEFXV//lTSjcBq4G3AlwYzKrPem0q1P4xb0g8CG4E9GqbvAazt/3DKERHrgDuA/Qc8FOsN134Lk7n2hy6kI+Ip4GbgmNo0Sdvkv68f1LhKIGk68EJgzaDHYt3n2m9tMtf+sO7uOB9YJOkmYBmwANgJuHiQg+o3SecB3yN9zNuLdFrWRuBrgxyX9ZRrn6lV+0MZ0hFxqaTdgHNIB0yWA8dGROMBlcluDqkodwEeAK4FjoiIBwY6KusZ1/5mU6b2fT1pM7OCDd0+aTOzqcQhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJdIGkkfzPEn3ZxmXPzMud2a5lm3eba770pG9KS5udCOGzQY+kVSa+TdLWkByWtk7RM0h8Oelw2WJO99vNXizV+tVbt9otBj69TQ/lv4TY2Sb8PLCZdeGcUCNJlHL8iadeIuGBwozPrqQXA9IZpLwA+Dvyw76OZIIf05PUB0hXBXhsRGwAk/SNwO+li6Q5pm5QiYnHjNEl/mX+9pL+jmbgpu7tjPCRtL+kcSTdLWi/pcUnXSHpNmzYflrRa0m8kLZX0sibzvFjSNyX9On/j8015y3es8eyY2+46juHPBB6uBTRARDxDuibxb8bR3qawIa/9Zt4J3BkRP6nYfmAc0u3NBP4YWAL8OWm3wW6kb2c+uMn8fwR8EPh74FzgZcCPJW2+SLuklwI3AC8BPgV8BHgcWCzpxDHGczjwc9JW8liWAC+V9DFJ+0t6oaS/Ag4DPj2O9ja1DXPtP4ukl+c+v9pp2yJExJS8kT7yB3BYm3mmAds3TJtN+haML9VNG8nLegLYu2764Xn6+XXTrgJ+CuxQN03AdcAdddPm5rZzm0wbHcfj2wm4FNiU2wTpBTFv0Ovet8HeJnvtN3ks5+W2Lxn0uq9y85Z0GxGxMdK3YSBpG0k7k/bj3wQc0qTJ4oi4p679MtIXZh6Xl7Ez8FrgG8AMSbvmj2+7AFeSvv147zbjWRIRiojRcQx/A+nrhL4JvAM4OY/7XyQdMY72NoUNee1vlr+55u3ALRHx807alsIHDscg6RTSx7IXA9vV3XVnk9mbnd5zB+msCkjfvybgY/nWzO7APS3u68TngCOAQyJiE4CkbwC3AhcBr+hCHzaJDXHt1zsa2JshPlDukG5D0snAQtKpbJ8B7id9Rc+ZpO9T61Ttk8t5pK2HZn5ZYbnPIml74L3Ap2sBDRART0u6AviApO1rW0pmjYa19pt4F2mX39B+rZZDur23ACuBN0feuQUg6ewW8x/QZNqLgFX595X559MRcVW3BtnELqTndlqT+7YjvWCa3WdWM6y1v5mkHYA/AJZExL396LMXvE+6vY35p2oTJL0COLLF/CfU71eTdDhpt8IVABFxP+lo+fskPb+xsdJ317XUwWlI9wPrgBPzVnWt/XTg94DbI8Kn4Vk7w1r79Y4jHewcunOj63lLGt4j6dgm0y8Cvg+8GfiOpMuBfYFTgdvY+j+aIH1cu1bS54EdSP/59BDPPuXtdNKXZv5M0hdJWxh7kIp/DnBQm7EeDlxN+mbk0VYzRcRGpW9T/jhwg6SvkLac35v7OLlNHzZ1TLrab/Au0gH0b41z/iI5pOG0FtMX5tuewPuAN5IK9GTgraRTghp9hbT/awHpIMgy4AMRsaY2Q0TcpnTNhLNIp0LtQtryvYX0DdBdERGfkHQn8KHc1w6k05/eEhFDXbTWNZOy9gEkzQSOBy6PiPXdXHa/+dvCzcwK5n3SZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwYr/tnBJAvYCHh30WGxcZgD3hr/heMJc+0OnJ7VffEiTivTuQQ/COjIHuGfQg5gEXPvDp+u1Pwwh3detiOOOO67jNl/4whcq9TVr1qyO26xfv75SXwceeGDf+sJbft3S1/VYpUaOP/74Sn399m//dsdt9tlnn0p9VXmdVVkXWdefs2EI6b7abrvtOm4zc+bMSn1VaVf1k1T65GzW2rRp0zpu85znPKdSXzvuuGPHbaZPn16prxkzZlRqV4q+HDiUdLqkVZKelHSjpMP70a/ZoLn2baJ6HtKSTgLOB84GDgFWAFdK2r3XfZsNkmvfuqEfW9JnAF+MiIsj4jbgVOAJ4D3NZpa0g6SZtRvpiKnZMHLt24T1NKQlbQ8cClxVmxYRm/LfR7Zodiawvu7mo9s2dFz71i293pLeFZgG3Ncw/T5gzxZtzgVm1d3m9Gx0Zr3j2reuKO7sjojYAGyo/e2zEmyqcO1bM73ekn4Q2Ajs0TB9D2Btj/s2GyTXvnVFT0M6Ip4CbgaOqU2TtE3++/pe9m02SK5965Z+7O44H1gk6SZgGbAA2Am4uA99mw2Sa98mrOchHRGXStoNOId0wGQ5cGxENB5Q6aqFCxdWanfKKad03Oayyy6r1Nfy5cs7bnPWWWdV6uvoo4/uuE3Vx2XJoGp/ZGSkUrsqr5mDDjqoUl8rVqyo1K6KdevW9a2vXujLgcOI+BzwuX70ZVYS175NlK8nbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFay4b2bplqpXAlu0aFHHbRYsWFCprwsvvLDjNuvXr6/U19KlSyu1s+FT9apvVa5o52+P6T1vSZuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgWbtBdYmjt3bt/6Gh0drdTulFNO6bjNZZddVqkvmzqqXlysisWLF1dqV+UiUFUvZFb1glOl8Ja0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBHNJmZgVzSJuZFcwhbWZWMIe0mVnBJu1V8PrpwgsvrNSuytXKqlw5D6qNcf78+ZX6sqnj4IMP7ltfVV9nw17H3pI2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwK5pA2MyuYQ9rMrGAOaTOzgjmkzcwKpogY9BjakjQTWD/ocbRT5UJJAKtWreq4zYIFCyr1dcEFF3TcRlKlvoBZEfFI1caW9Lv2R0dHO26zePHiSn1VuehR1dfZCSecUKldRV2vfW9Jm5kVzCFtZlYwh7SZWcF6GtKSRiVFw+32XvZpVgLXvnVLP76Z5VbgdXV/P9OHPs1K4Nq3CetHSD8TEWvHO7OkHYAd6ibN6P6QzPrCtW8T1o990gdIulfSSkmXSNpnjPnPJJ12VLvd3fMRmvWGa98mrNchfSMwHzgWOA3YF7hGUrsthHOBWXW3OT0eo1kvuPatK3q6uyMirqj786eSbgRWA28DvtSizQZgQ+3vCfxDhdnAuPatW/p6Cl5ErAPuAPbvZ79mg+bat6r6GtKSpgMvBNb0s1+zQXPtW1W9Pk/6PElHSxqR9ErgO8BG4Gu97Nds0Fz71i29PgVvDqkodwEeAK4FjoiIB3rcr9mgufatK3p94PDtvVx+L1S5EljVq2ydddZZHbepMj6ApUuXVmpn1Qxj7a9bt67jNkuWLOlbX8uXL6/U17DztTvMzArmkDYzK5hD2sysYA5pM7OCOaTNzArmkDYzK5hD2sysYA5pM7OCOaTNzArmkDYzK5hD2sysYA5pM7OCKSIGPYa2JM0kfd9bX8ybN6/jNosWLarU16xZszpus3r16kp9VbkI1AQuaDMrIh6p2tiSftf+yMhIx20WLlxYqa/Zs2d33KbqhcxWrVpVqV1FXa99b0mbmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYFc0ibmRXMIW1mVjCHtJlZwRzSZmYF23bQAyjN008/3XGbRx6pdj0VSR23efTRRyv1tXHjxkrtbOrYtGlTx20ef/zxSn1tu23n0VNlfJPBMFwFb2/g7kGPwzoyJyLuGfQghp1rfyh1vfaHIaQF7AU024ScQSriOS3un0pKWRczgHuj9MIaAm1qv5TnugQlrYue1H7xuzvyA276zlS3u+DRqX794oLWxZR+HrqpVe0X9FwPXGHroif9+8ChmVnBHNJmZgUb9pDeAJydf051XhdTh5/rLSb9uij+wKGZ2VQ27FvSZmaTmkPazKxgDmkzs4I5pM3MCja0IS3pdEmrJD0p6UZJhw96TP0maVRSNNxuH/S4rLdc+1Or9ocypCWdBJxPOvXmEGAFcKWk3Qc6sMG4FXh+3e2owQ7Hesm1/yxTovaHMqSBM4AvRsTFEXEbcCrwBPCewQ5rIJ6JiLV1twcHPSDrKdf+FlOi9ocupCVtDxwKXFWbFhGb8t9HDmpcA3SApHslrZR0iaR9Bj0g6w3X/lamRO0PXUgDuwLTgPsapt8H7Nn/4QzUjcB84FjgNGBf4BpJMwY5KOsZ1/4WU6b2i78KnrUWEVfU/flTSTcCq4G3AV8azKjMem8q1f4wbkk/CGwE9miYvgewtv/DKUdErAPuAPYf8FCsN1z7LUzm2h+6kI6Ip4CbgWNq0yRtk/++flDjKoGk6cALgTWDHot1n2u/tclc+8O6u+N8YJGkm4BlwAJgJ+DiQQ6q3ySdB3yP9DFvL9JpWRuBrw1yXNZTrn2mVu0PZUhHxKWSdgPOIR0wWQ4cGxGNB1QmuzmkotwFeAC4FjgiIh4Y6KisZ1z7m02Z2velSs3MCjZ0+6TNzKYSh7SZWcEc0mZmBXNIm5kVzCFtZlYwh7SZWcEc0mZmBXNIm5kVzCHdBZJG8tf3/GkXlzk3L3Nut5Zp1m2u/d6bsiEtaX4uhMMGPZZekHSipCvzRdE3SLpb0jclvWzQY7PBmgK13+z7D0PSk4MeWxVDee0OG5cDgYeBi0iXuNyT9BVLyyQdGRErBjk4sz44DXis7u+NgxrIRDikJ6mIOKdxmqR/Bu4mFe+pfR+UWX99czJ87+GU3d0xHpK2l3SOpJslrZf0uKRrJL2mTZsPS1ot6TeSljbbvSDpxXnXw68lPSnpJkm/P47x7Jjb7lrxId1P+tLS2RXb2xQxSWpfkmZKUgdtiuOQbm8m8MfAEuDPgVFgN+BKSQc3mf+PgA8Cfw+cC7wM+LGkzd+kIemlwA3AS4BPAR8BHgcWSzpxjPEcDvwc+MB4H4Ck2ZJ2k3Qg8M/5Mf37eNvblDX0tQ+sBNYDj0r6l/qxDBPv7mjvYWAkfyMGAJK+CNwO/Anw3ob59wcOiIh78rw/IH1h5p8DZ+R5LgLuAn4nIjbk+f6BdD3cvwG+0+XHcAPwP/LvjwEfZ5J9B5z1xDDX/sPA50jfVrMBeBVwOnC4pMMi4pEu9dMXDuk2ImIj+WBD/pqi2aRPHzcBhzRpsrhWpLn9svwFmccBZ0jaGXgt8NfAjIZvNr4SOFvS3vXLaBjPEqDTj27vJm0V7Zd/fy7pG6c3dbgcm0KGufYj4qKGSd+StAy4BHg/aSt+aDikxyDpFNLHshcD29XddWeT2X/RZNodpG8whrS1IeBj+dbM7kDTQq0iIjZ/952kr5M+MgJ07bxWm5yGvfbrRcRXJf0t8Doc0pOHpJOBhcBi4DOkA28bgTNJX3rZqdoxgPNIWw/N/LLCcsclIh6W9GPgXTikrY3JVvvZr4Cde9xH1zmk23sL6eDDm6Pue8Yknd1i/gOaTHsRsCr/vjL/fDoirurWIDv0XGDWgPq24TGpaj+f4TEC3NLvvifKZ3e0Vzv5ffO+MEmvAI5sMf8Jkvaum/dw4BXAFQARcT/paPn7JD2/sbHSF4y21MlpSJJ2bzJtBDiGtF/RrJ1hrv1myzqNdHbKD8ZqXxpvScN7JB3bZPpFwPeBNwPfkXQ5sC/pn0BuA6Y3afNL4FpJnwd2ABYADwGfrpvndNLR7J/lo+UrgT1IxT8HOKjNWA8HriZ9ff3oGI/rZ5L+nfRt0g+TtnTeS9q3+NEx2trUMFlrf7WkS4GfAU8CRwFvJ70W/nGMtsVxSKd32GYW5tuewPuAN5IK9GTgrcDcJm2+QjprYgHpIMgy4AMRsaY2Q0TcpnTNhLOA+aSvpL+f9DFsq/8SnIDPA8cDxwIzch8/BD4ZET/rYj82vCZr7V8CvBL4A+A5wGrSm8UnIuKJLvbTF6rb3WRmZoXxPmkzs4I5pM3MCuaQNjMrmEPazKxgDmkzs4I5pM3MCuaQNjMrmEPazKxgDmkzs4I5pM3MCuaQNjMrmEPazKxg/x89BghkxmQJjQAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Visualize some of the data\n", "fig, axes = plt.subplots(2, 2, figsize=(4, 4))\n", @@ -219,28 +192,15 @@ "from support.banzhaf import TorchCNNModel\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "model = TorchCNNModel(lr=0.001, epochs=40, batch_size=32, device=device)\n", + "model = TorchCNNModel(lr=0.001, epochs=n_epochs, batch_size=32, device=device)\n", "model.fit(x=training_data[0], y=training_data[1])" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Accuracy: 0.705\n", - "Test Accuracy: 0.630\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(f\"Train Accuracy: {model.score(x=training_data[0], y=training_data[1]):.3f}\")\n", "print(f\"Test Accuracy: {model.score(x=test_data[0], y=test_data[1]):.3f}\")" @@ -550,21 +510,9 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████▉| 99.9/100 [00:59<00:00, 1.69%/s] \n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "anomalous_dataset = Dataset(\n", " x_train=x_train_anomalous,\n", @@ -574,7 +522,7 @@ ")\n", "\n", "anomalous_utility = Utility(\n", - " model=TorchCNNModel(),\n", + " model=TorchCNNModel(lr=0.001, epochs=n_epochs, batch_size=32, device=device),\n", " data=anomalous_dataset,\n", " scorer=Scorer(\"accuracy\", default=0.0, range=(0, 1)),\n", " cache_backend=MemcachedCacheBackend(MemcachedClientConfig()),\n", @@ -722,12 +670,12 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utility = Utility(\n", - " model=TorchCNNModel(),\n", + " model=TorchCNNModel(lr=0.001, epochs=n_epochs, batch_size=32, device=device),\n", " data=dataset,\n", " scorer=Scorer(\"accuracy\", default=0.0, range=(0, 1)),\n", " cache_backend=MemcachedCacheBackend(MemcachedClientConfig()),\n", @@ -969,7 +917,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -983,7 +931,7 @@ " )\n", "else:\n", " utility = Utility(\n", - " model=TorchCNNModel(),\n", + " model=TorchCNNModel(lr=0.001, epochs=n_epochs, batch_size=32, device=device),\n", " data=dataset,\n", " scorer=Scorer(\"accuracy\", default=0.0, range=(0, 1)),\n", " )" @@ -991,7 +939,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From a9c6201037c4750a636f1f133d307468fb7cb37f Mon Sep 17 00:00:00 2001 From: Miguel de Benito Delgado Date: Sun, 12 Jan 2025 19:58:58 +0100 Subject: [PATCH 21/21] [skip ci] Update changelog --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ec27638d..9a5728726 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,14 +48,19 @@ - Fix a bug in the calculation of variance estimates for MSR Banzhaf [PR #605](https://github.com/aai-institute/pyDVL/pull/605) - Fix a bug in KNN Shapley values. See [Issue 613](https://github.com/aai-institute/pyDVL/issues/613) for details. - +- Backport the KNN Shapley fix to the `value` module + [PR #633](https://github.com/aai-institute/pyDVL/pull/633) ### Changed - Use tighter bounds for the calculation of the minimal sample size that guarantees an epsilon-delta approximation in group testing (Jia et al. 2023) [PR #602](https://github.com/aai-institute/pyDVL/pull/602) +- Dropped black, isort and pylint from the CI pipeline, in favour of ruff + [PR #633](https://github.com/aai-institute/pyDVL/pull/633) - **Breaking Changes** + - Dropped support for python 3.8 after EOL + [PR #633](https://github.com/aai-institute/pyDVL/pull/633) - Rename parameter `hessian_regularization` of `DirectInfluence` to `regularization` and change the type annotation to allow for block-wise regularization parameters