From 671c5e10faeaf46e647c71fdf92cc6f39cb9deb8 Mon Sep 17 00:00:00 2001 From: Mike Date: Fri, 26 Jul 2024 11:28:30 +1200 Subject: [PATCH] HDXDSYS-914 Keep crisis tags on dataset update (#72) * Keep crisis tags * Keep crisis tags tests * Add keep_crisis_tags option which by default is True * Fix PR concurrent workflow runs causing failure --- .github/workflows/run-python-tests.yaml | 4 ++ pyproject.toml | 2 +- requirements.txt | 14 +++---- src/hdx/data/dataset.py | 15 ++++++++ tests/hdx/data/__init__.py | 7 ++++ tests/hdx/data/test_dataset_core.py | 11 +++++- tests/hdx/data/test_dataset_noncore.py | 50 +++++++++++++++++++++---- tests/hdx/data/test_vocabulary.py | 7 ++++ 8 files changed, 93 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run-python-tests.yaml b/.github/workflows/run-python-tests.yaml index a3e36f0..8572447 100644 --- a/.github/workflows/run-python-tests.yaml +++ b/.github/workflows/run-python-tests.yaml @@ -13,6 +13,10 @@ on: branches-ignore: - gh-pages +concurrency: + group: ${{ github.head_ref || github.ref_name }} + + jobs: build: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index bb780b1..4fe626a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "ckanapi>=4.8", "defopt>=6.4.0", "email_validator", - "hdx-python-country>=3.7.6", + "hdx-python-country>=3.7.7", "hdx-python-utilities>=3.7.2", "libhxl>=5.2.1", "makefun", diff --git a/requirements.txt b/requirements.txt index ac33997..4d7df47 100755 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ click==8.1.7 # via typer coverage==7.6.0 # via pytest-cov -cryptography==42.0.8 +cryptography==43.0.0 # via pyopenssl defopt==6.4.0 # via hdx-python-api (pyproject.toml) @@ -56,7 +56,7 @@ google-auth-oauthlib==1.2.1 # via gspread gspread==6.1.2 # via hdx-python-api (pyproject.toml) -hdx-python-country==3.7.6 +hdx-python-country==3.7.7 # via hdx-python-api (pyproject.toml) hdx-python-utilities==3.7.2 # via @@ -150,13 +150,13 @@ pydantic-core==2.20.1 # via pydantic pygments==2.18.0 # via rich -pyopenssl==24.1.0 +pyopenssl==24.2.1 # via # hdx-python-api (pyproject.toml) # ndg-httpsclient pyphonetics==0.5.3 # via hdx-python-country -pytest==8.2.2 +pytest==8.3.2 # via # hdx-python-api (pyproject.toml) # pytest-cov @@ -202,7 +202,7 @@ rfc3986==2.0.0 # via frictionless rich==13.7.1 # via typer -rpds-py==0.19.0 +rpds-py==0.19.1 # via # jsonschema # referencing @@ -212,7 +212,7 @@ ruamel-yaml==0.18.6 # via hdx-python-utilities ruamel-yaml-clib==0.2.8 # via ruamel-yaml -setuptools==70.3.0 +setuptools==71.1.0 # via ckanapi shellingham==1.5.4 # via typer @@ -231,7 +231,7 @@ sphinxcontrib-napoleon==0.7 # via defopt stringcase==1.2.0 # via frictionless -structlog==24.2.0 +structlog==24.4.0 # via libhxl tableschema-to-template==0.0.13 # via hdx-python-utilities diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 5013c27..85129f5 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -986,6 +986,19 @@ def _dataset_hdx_update( match_resource_order, **kwargs, ) + keep_crisis_tags = kwargs.get("keep_crisis_tags", True) + if keep_crisis_tags: + for tag in self.data["tags"]: + tag_name = tag["name"] + if tag_name[:7] != "crisis-": + continue + found = False + for old_tag in self.old_data["tags"]: + if old_tag["name"] == tag_name: + found = True + break + if not found: + self.old_data["tags"].append(tag) self._prepare_hdx_call(self.old_data, kwargs) return self._revise_dataset( keys_to_delete, @@ -1021,6 +1034,7 @@ def update_in_hdx( create_default_views (bool): Whether to call package_create_default_resource_views. Defaults to True. hxl_update (bool): Whether to call package_hxl_update. Defaults to True. **kwargs: See below + keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True. updated_by_script (str): String to identify your script. Defaults to your user agent. batch (str): A string you can specify to show which datasets are part of a single batch update @@ -1077,6 +1091,7 @@ def create_in_hdx( create_default_views (bool): Whether to call package_create_default_resource_views (if updating). Defaults to True. hxl_update (bool): Whether to call package_hxl_update. Defaults to True. **kwargs: See below + keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True. updated_by_script (str): String to identify your script. Defaults to your user agent. batch (str): A string you can specify to show which datasets are part of a single batch update diff --git a/tests/hdx/data/__init__.py b/tests/hdx/data/__init__.py index 7ab16e4..78bf898 100755 --- a/tests/hdx/data/__init__.py +++ b/tests/hdx/data/__init__.py @@ -45,6 +45,13 @@ def json(self): "id": "aaafc63b-2234-48e3-8ccc-198d7cf0f3f3", "name": "political violence", }, + { + "state": "active", + "display_name": "crisis-somewhere", + "vocabulary_id": "4381925f-0ae9-44a3-b30d-cae35598757b", + "id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52", + "name": "crisis-somewhere", + }, ] dataset_data = { diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py index 99fa42b..c6f7ec9 100755 --- a/tests/hdx/data/test_dataset_core.py +++ b/tests/hdx/data/test_dataset_core.py @@ -649,13 +649,17 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): assert dataset["dataset_date"] == "06/04/2016" dataset["dataset_date"] = "02/26/2016" - dataset.remove_tag("conflict") + dataset.remove_tag("crisis-somewhere") dataset["id"] = "TEST1" dataset["name"] = "MyDataset1" dataset.update_in_hdx() assert dataset["id"] == "TEST1" assert dataset["dataset_date"] == "02/26/2016" - assert dataset.get_tags() == ["political violence"] + assert dataset.get_tags() == [ + "conflict", + "political violence", + "crisis-somewhere", + ] assert dataset["state"] == "active" pattern = ( r"HDXPythonLibrary/%s-test \([12]\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d\)" @@ -668,6 +672,9 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): "resource_name": "Resource1", "view_type": "hdx_hxl_preview", } + dataset.remove_tag("crisis-somewhere") + dataset.update_in_hdx(keep_crisis_tags=False) + assert dataset.get_tags() == ["conflict", "political violence"] dataset.preview_resourceview = ResourceView(resourceviewdata) dataset.update_in_hdx() assert dataset.preview_resourceview is None diff --git a/tests/hdx/data/test_dataset_noncore.py b/tests/hdx/data/test_dataset_noncore.py index 2843c63..93f64de 100755 --- a/tests/hdx/data/test_dataset_noncore.py +++ b/tests/hdx/data/test_dataset_noncore.py @@ -438,10 +438,18 @@ def test_get_add_tags(self, configuration, vocabulary_read): datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) assert dataset["tags"] == resulttags - assert dataset.get_tags() == ["conflict", "political violence"] + assert dataset.get_tags() == [ + "conflict", + "political violence", + "crisis-somewhere", + ] dataset.add_tag("LALA") assert dataset["tags"] == resulttags - assert dataset.get_tags() == ["conflict", "political violence"] + assert dataset.get_tags() == [ + "conflict", + "political violence", + "crisis-somewhere", + ] dataset.add_tag("conflict") expected = copy.deepcopy(resulttags) expected.append( @@ -454,6 +462,7 @@ def test_get_add_tags(self, configuration, vocabulary_read): assert dataset.get_tags() == [ "conflict", "political violence", + "crisis-somewhere", "conflict-violence", ] dataset.add_tags( @@ -467,6 +476,7 @@ def test_get_add_tags(self, configuration, vocabulary_read): assert dataset.get_tags() == [ "conflict", "political violence", + "crisis-somewhere", "conflict-violence", "employment", "fatalities", @@ -475,6 +485,7 @@ def test_get_add_tags(self, configuration, vocabulary_read): assert dataset.get_tags() == [ "conflict", "political violence", + "crisis-somewhere", "employment", "fatalities", ] @@ -534,14 +545,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read): Vocabulary.read_tags_mappings(failchained=False) datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) - assert dataset.get_tags() == ["conflict", "political violence"] + assert dataset.get_tags() == [ + "conflict", + "political violence", + "crisis-somewhere", + ] assert dataset.clean_tags() == ( - ["conflict-violence"], + ["conflict-violence", "crisis-somewhere"], ["political violence"], ) dataset.add_tags(["nodeid123", "transportation"]) assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", ] dataset["tags"].append( @@ -551,21 +567,28 @@ def test_add_clean_tags(self, configuration, vocabulary_read): } ) assert dataset.clean_tags() == ( - ["conflict-violence", "transportation"], + ["conflict-violence", "crisis-somewhere", "transportation"], ["nodeid123"], ) assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", ] dataset.add_tags(["geodata", "points"]) assert dataset.clean_tags() == ( - ["conflict-violence", "transportation", "geodata"], + [ + "conflict-violence", + "crisis-somewhere", + "transportation", + "geodata", + ], [], ) dataset.add_tag("financial") assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", "geodata", ] @@ -576,13 +599,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read): } ) assert dataset.clean_tags() == ( - ["conflict-violence", "transportation", "geodata"], + [ + "conflict-violence", + "crisis-somewhere", + "transportation", + "geodata", + ], ["financial"], ) dataset.add_tag("addresses") assert dataset.clean_tags() == ( [ "conflict-violence", + "crisis-somewhere", "transportation", "geodata", ], @@ -591,12 +620,14 @@ def test_add_clean_tags(self, configuration, vocabulary_read): dataset.remove_tag("geodata") assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", ] dataset.add_tag("cultivos coca") assert dataset.clean_tags() == ( [ "conflict-violence", + "crisis-somewhere", "transportation", "livelihoods", ], @@ -606,6 +637,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read): dataset.add_tag("atentados") assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", ] dataset["tags"].append( @@ -617,6 +649,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read): assert dataset.clean_tags() == ( [ "conflict-violence", + "crisis-somewhere", "transportation", ], [], @@ -625,6 +658,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read): assert dataset.clean_tags() == ( [ "conflict-violence", + "crisis-somewhere", "transportation", "cyclones-hurricanes-typhoons", ], @@ -635,6 +669,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read): print(dataset.get_tags()) assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "transportation", "cyclones-hurricanes-typhoons", "affected area", @@ -643,6 +678,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read): dataset.remove_tag("affected area") assert dataset.get_tags() == [ "conflict-violence", + "crisis-somewhere", "cyclones-hurricanes-typhoons", ] diff --git a/tests/hdx/data/test_vocabulary.py b/tests/hdx/data/test_vocabulary.py index af2e0c4..6ddf402 100755 --- a/tests/hdx/data/test_vocabulary.py +++ b/tests/hdx/data/test_vocabulary.py @@ -237,6 +237,13 @@ "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "display_name": "covid-19", }, + { + "id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52", + "state": "active", + "display_name": "crisis-somewhere", + "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", + "name": "crisis-somewhere", + }, { "id": "326e097b-96f2-46e4-8ef4-0a8d4401a646", "name": "cyclones-hurricanes-typhoons",