Skip to content

Commit

Permalink
Fix SYNERGY visible in Oracle mode setup (asreview#1556)
Browse files Browse the repository at this point in the history
J535D165 authored Nov 2, 2023
1 parent 0444692 commit 58a49b2
Showing 12 changed files with 28 additions and 66 deletions.
10 changes: 9 additions & 1 deletion asreview/datasets.py
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
import json
import socket
import tempfile
import warnings
from abc import ABC
from abc import abstractmethod
from pathlib import Path
@@ -737,14 +738,21 @@ def __init__(self):

class BenchmarkDataGroup(BaseDataGroup):
"""Datasets available in the benchmark platform.
Deprecated
"""

group_id = "benchmark"
description = "DEPRECATED: Datasets available in the online benchmark platform"

def __init__(self):

warnings.warn(
"The use of 'benchmark' datasets is deprecated, "
"use SYNERGY dataset instead. For more information, see "
"https://github.com/asreview/synergy-dataset.",
category=UserWarning
)

meta_file = "https://raw.githubusercontent.com/asreview/systematic-review-datasets/master/index_v1.json" # noqa
datasets = _download_from_metadata(meta_file)

23 changes: 2 additions & 21 deletions asreview/entry_points/simulate.py
Original file line number Diff line number Diff line change
@@ -42,26 +42,6 @@
from asreview.utils import get_random_state


def _get_dataset_path_from_args(args_dataset):
"""Remove 'benchmark:' from the dataset name and add .csv suffix.
Parameters
----------
args_dataset : str
Name of the dataset.
Returns
-------
str
Dataset name without 'benchmark:' if it started with that,
and with .csv suffix.
"""
if args_dataset.startswith("benchmark:"):
args_dataset = args_dataset[10:]

return Path(args_dataset).with_suffix(".csv").name


def _set_log_verbosity(verbose):
if verbose == 0:
logging.getLogger().setLevel(logging.WARNING)
@@ -135,7 +115,8 @@ def execute(self, argv): # noqa
)

# Add the dataset to the project file.
dataset_path = _get_dataset_path_from_args(args.dataset)
dataset_path = Path(
args.dataset.replace(":", "-")).with_suffix(".csv").name

as_data.to_file(Path(fp_tmp_simulation, "data", dataset_path))
# Update the project.json.
4 changes: 3 additions & 1 deletion asreview/webapp/api/projects.py
Original file line number Diff line number Diff line change
@@ -305,7 +305,9 @@ def api_demo_data_project(): # noqa: F401
if subset == "plugin":
try:
result_datasets = manager.list(
exclude=["builtin", "benchmark", "benchmark-nature"]
exclude=[
"builtin", "synergy", "benchmark", "benchmark-nature"
]
)

except Exception as err:
2 changes: 1 addition & 1 deletion asreview/webapp/tests/test_api/test_projects.py
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@
# NOTE: I don't see a plugin that can be used for testing
# purposes
UPLOAD_DATA = [
{"benchmark": "benchmark:Hall_2012"},
{"benchmark": "synergy:van_der_Valk_2021"},
{
"url": "https://raw.githubusercontent.com/asreview/"
+ "asreview/master/tests/demo_data/generic_labels.csv"
2 changes: 1 addition & 1 deletion docs/source/reference.rst
Original file line number Diff line number Diff line change
@@ -58,7 +58,7 @@ Available datasets
.. autosummary::
:toctree: generated/

asreview.datasets.BenchmarkDataGroup
asreview.datasets.SynergyDataGroup
asreview.datasets.NaturePublicationDataGroup

Dataset managers
12 changes: 5 additions & 7 deletions docs/source/simulation_cli.rst
Original file line number Diff line number Diff line change
@@ -62,23 +62,21 @@ Dataset

.. option:: dataset

Required. File path or URL to the dataset or one of the benchmark datasets.
Required. File path or URL to the dataset or one of the SYNERGY datasets.

You can also use one of the :ref:`benchmark-datasets <data_labeled:fully
labeled data>` (see `index.csv
<https://github.com/asreview/systematic-review-datasets/blob/master/index.csv>`_
for dataset IDs). Use the following command and replace ``DATASET_ID`` by the
You can also use one of the :ref:`SYNERGY dataset <data_labeled:fully
labeled data>`. Use the following command and replace ``DATASET_ID`` by the
dataset ID.

.. code:: bash
asreview simulate benchmark:DATASET_ID
asreview simulate synergy:DATASET_ID
For example:

.. code:: bash
asreview simulate benchmark:van_de_Schoot_2017 -s myreview.asreview
asreview simulate synergy:van_de_schoot_2018 -s myreview.asreview
Active learning
2 changes: 1 addition & 1 deletion docs/source/simulation_overview.rst
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@ inspection
Datasets for simulation
-----------------------

Simulations require :ref:`fully labeled datasets <data_labeled:fully labeled data>` (labels: ``0`` = irrelevant, ``1`` = relevant). Such a dataset can be the result of an earlier study. ASReview offers also fully labeled datasets via the `benchmark platform <https://github.com/asreview/systematic-review-datasets>`_. These datasets are available via the user interface in the *Data* step of the setup and in the command line with the prefix `benchmark:` (e.g. `benchmark:van_de_schoot_2017`).
Simulations require :ref:`fully labeled datasets <data_labeled:fully labeled data>` (labels: ``0`` = irrelevant, ``1`` = relevant). Such a dataset can be the result of an earlier study. ASReview offers also fully labeled datasets via the `SYNERGY dataset <https://github.com/asreview/synergy-dataset>`_. These datasets are available via the user interface in the *Data* step of the setup and in the command line with the prefix `synergy:` (e.g. `synergy:van_de_schoot_2018`).

.. tip::

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -199,9 +199,9 @@ def get_cmdclass():
".xlsx = asreview.io:ExcelWriter",
],
"asreview.datasets": [
"benchmark = asreview.datasets:BenchmarkDataGroup",
"benchmark-nature = asreview.datasets:NaturePublicationDataGroup",
"synergy = asreview.datasets:SynergyDataGroup",
"benchmark = asreview.datasets:BenchmarkDataGroup",
],
"asreview.models.classifiers": [
"svm = asreview.models.classifiers:SVMClassifier",
5 changes: 0 additions & 5 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -39,11 +39,6 @@ def test_fuzzy_finder(keywords, paper_id):
@mark.parametrize(
"data_name",
[
# datasets from the datasets repo
"benchmark:van_de_Schoot_2017",
"benchmark:Hall_2012",
"benchmark:Cohen_2006_ACEInhibitors",
"benchmark:Bos_2018",
# datasets from the Van de Schoot et al. paper
# https://github.com/asreview/paper-asreview/blob/master/index_v1.json
"benchmark-nature:van_de_Schoot_2017",
15 changes: 0 additions & 15 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
@@ -6,21 +6,6 @@
from asreview.datasets import NaturePublicationDataGroup


@pytest.mark.parametrize(
"data_id",
[
"benchmark:van_de_Schoot_2017",
"benchmark:Hall_2012",
"benchmark:Cohen_2006_ACEInhibitors",
"benchmark:Bos_2018",
],
)
def test_datasets(data_id):
data = DatasetManager().find(data_id)
assert data.filepath.startswith("https://raw.githubusercontent.com/asreview/")
assert data.title is not None


def test_group():
group_nature = NaturePublicationDataGroup()

10 changes: 5 additions & 5 deletions tests/test_models.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@

DATASET="van_de_schoot_2017"
DATASET="van_de_schoot_2018"

QUERY_STRATEGIES=('max_random' 'max_uncertainty' 'max' 'uncertainty' 'random')
# ('max_random' 'max_uncertainty' 'max' 'uncertainty' 'random' 'cluster')

for qs in "${QUERY_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $qs --seed 535 --init_seed 535 -s ${DATASET}_${qs}.asreview
asreview simulate synergy:${DATASET} -q $qs --seed 535 --init_seed 535 -s ${DATASET}_${qs}.asreview
asreview plot recall ${DATASET}_${qs}.asreview -o ${DATASET}_${qs}_recall.png
done

@@ -16,7 +16,7 @@ BALANCE_STRATEGIES=('double' 'simple' 'undersample')

for bs in "${BALANCE_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $bs --seed 535 --init_seed 535 -s ${DATASET}_${bs}.asreview
asreview simulate synergy:${DATASET} -q $bs --seed 535 --init_seed 535 -s ${DATASET}_${bs}.asreview
asreview plot recall ${DATASET}_${bs}.asreview -o ${DATASET}_${bs}_recall.png
done

@@ -27,7 +27,7 @@ MODELS=('logistic' 'nb' 'rf' 'svm')

for m in "${MODELS[@]}"
do
asreview simulate benchmark:${DATASET} -q $m --seed 535 --init_seed 535 -s ${DATASET}_${m}.asreview
asreview simulate synergy:${DATASET} -q $m --seed 535 --init_seed 535 -s ${DATASET}_${m}.asreview
asreview plot recall ${DATASET}_${m}.asreview -o ${DATASET}_${m}_recall.png
done

@@ -38,7 +38,7 @@ FEATURE_STRATEGIES=('tfidf')

for fs in "${FEATURE_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $fs --seed 535 --init_seed 535 -s ${DATASET}_${fs}.asreview
asreview simulate synergy:${DATASET} -q $fs --seed 535 --init_seed 535 -s ${DATASET}_${fs}.asreview
asreview plot recall ${DATASET}_${fs}.asreview -o ${DATASET}_${fs}_recall.png
done

7 changes: 0 additions & 7 deletions tests/test_simulate.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
import pytest

from asreview.entry_points.simulate import SimulateEntryPoint
from asreview.entry_points.simulate import _get_dataset_path_from_args
from asreview.entry_points.simulate import _simulate_parser
from asreview.project import ASReviewProject
from asreview.project import ProjectExistsError
@@ -337,9 +336,3 @@ def test_is_partial_simulation(tmpdir):
entry_point.execute(argv)

assert _is_partial_simulation(args) # noqa


def test_get_dataset_path_from_args():
assert _get_dataset_path_from_args("test") == "test.csv"
assert _get_dataset_path_from_args("test.ris") == "test.csv"
assert _get_dataset_path_from_args("benchmark:test") == "test.csv"

0 comments on commit 58a49b2

Please sign in to comment.