From 671c5e10faeaf46e647c71fdf92cc6f39cb9deb8 Mon Sep 17 00:00:00 2001
From: Mike <rans@email.com>
Date: Fri, 26 Jul 2024 11:28:30 +1200
Subject: [PATCH] HDXDSYS-914 Keep crisis tags on dataset update (#72)

* Keep crisis tags

* Keep crisis tags tests

* Add keep_crisis_tags option which by default is True

* Fix PR concurrent workflow runs causing failure
---
 .github/workflows/run-python-tests.yaml |  4 ++
 pyproject.toml                          |  2 +-
 requirements.txt                        | 14 +++----
 src/hdx/data/dataset.py                 | 15 ++++++++
 tests/hdx/data/__init__.py              |  7 ++++
 tests/hdx/data/test_dataset_core.py     | 11 +++++-
 tests/hdx/data/test_dataset_noncore.py  | 50 +++++++++++++++++++++----
 tests/hdx/data/test_vocabulary.py       |  7 ++++
 8 files changed, 93 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/run-python-tests.yaml b/.github/workflows/run-python-tests.yaml
index a3e36f0..8572447 100644
--- a/.github/workflows/run-python-tests.yaml
+++ b/.github/workflows/run-python-tests.yaml
@@ -13,6 +13,10 @@ on:
     branches-ignore:
       - gh-pages
 
+concurrency:
+  group: ${{ github.head_ref || github.ref_name }}
+
+
 jobs:
   build:
     runs-on: ubuntu-latest
diff --git a/pyproject.toml b/pyproject.toml
index bb780b1..4fe626a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     "ckanapi>=4.8",
     "defopt>=6.4.0",
     "email_validator",
-    "hdx-python-country>=3.7.6",
+    "hdx-python-country>=3.7.7",
     "hdx-python-utilities>=3.7.2",
     "libhxl>=5.2.1",
     "makefun",
diff --git a/requirements.txt b/requirements.txt
index ac33997..4d7df47 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,7 +26,7 @@ click==8.1.7
     # via typer
 coverage==7.6.0
     # via pytest-cov
-cryptography==42.0.8
+cryptography==43.0.0
     # via pyopenssl
 defopt==6.4.0
     # via hdx-python-api (pyproject.toml)
@@ -56,7 +56,7 @@ google-auth-oauthlib==1.2.1
     # via gspread
 gspread==6.1.2
     # via hdx-python-api (pyproject.toml)
-hdx-python-country==3.7.6
+hdx-python-country==3.7.7
     # via hdx-python-api (pyproject.toml)
 hdx-python-utilities==3.7.2
     # via
@@ -150,13 +150,13 @@ pydantic-core==2.20.1
     # via pydantic
 pygments==2.18.0
     # via rich
-pyopenssl==24.1.0
+pyopenssl==24.2.1
     # via
     #   hdx-python-api (pyproject.toml)
     #   ndg-httpsclient
 pyphonetics==0.5.3
     # via hdx-python-country
-pytest==8.2.2
+pytest==8.3.2
     # via
     #   hdx-python-api (pyproject.toml)
     #   pytest-cov
@@ -202,7 +202,7 @@ rfc3986==2.0.0
     # via frictionless
 rich==13.7.1
     # via typer
-rpds-py==0.19.0
+rpds-py==0.19.1
     # via
     #   jsonschema
     #   referencing
@@ -212,7 +212,7 @@ ruamel-yaml==0.18.6
     # via hdx-python-utilities
 ruamel-yaml-clib==0.2.8
     # via ruamel-yaml
-setuptools==70.3.0
+setuptools==71.1.0
     # via ckanapi
 shellingham==1.5.4
     # via typer
@@ -231,7 +231,7 @@ sphinxcontrib-napoleon==0.7
     # via defopt
 stringcase==1.2.0
     # via frictionless
-structlog==24.2.0
+structlog==24.4.0
     # via libhxl
 tableschema-to-template==0.0.13
     # via hdx-python-utilities
diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py
index 5013c27..85129f5 100755
--- a/src/hdx/data/dataset.py
+++ b/src/hdx/data/dataset.py
@@ -986,6 +986,19 @@ def _dataset_hdx_update(
             match_resource_order,
             **kwargs,
         )
+        keep_crisis_tags = kwargs.get("keep_crisis_tags", True)
+        if keep_crisis_tags:
+            for tag in self.data["tags"]:
+                tag_name = tag["name"]
+                if tag_name[:7] != "crisis-":
+                    continue
+                found = False
+                for old_tag in self.old_data["tags"]:
+                    if old_tag["name"] == tag_name:
+                        found = True
+                        break
+                if not found:
+                    self.old_data["tags"].append(tag)
         self._prepare_hdx_call(self.old_data, kwargs)
         return self._revise_dataset(
             keys_to_delete,
@@ -1021,6 +1034,7 @@ def update_in_hdx(
             create_default_views (bool): Whether to call package_create_default_resource_views. Defaults to True.
             hxl_update (bool): Whether to call package_hxl_update. Defaults to True.
             **kwargs: See below
+            keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
             updated_by_script (str): String to identify your script. Defaults to your user agent.
             batch (str): A string you can specify to show which datasets are part of a single batch update
 
@@ -1077,6 +1091,7 @@ def create_in_hdx(
             create_default_views (bool): Whether to call package_create_default_resource_views (if updating). Defaults to True.
             hxl_update (bool): Whether to call package_hxl_update. Defaults to True.
             **kwargs: See below
+            keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
             updated_by_script (str): String to identify your script. Defaults to your user agent.
             batch (str): A string you can specify to show which datasets are part of a single batch update
 
diff --git a/tests/hdx/data/__init__.py b/tests/hdx/data/__init__.py
index 7ab16e4..78bf898 100755
--- a/tests/hdx/data/__init__.py
+++ b/tests/hdx/data/__init__.py
@@ -45,6 +45,13 @@ def json(self):
         "id": "aaafc63b-2234-48e3-8ccc-198d7cf0f3f3",
         "name": "political violence",
     },
+    {
+        "state": "active",
+        "display_name": "crisis-somewhere",
+        "vocabulary_id": "4381925f-0ae9-44a3-b30d-cae35598757b",
+        "id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52",
+        "name": "crisis-somewhere",
+    },
 ]
 
 dataset_data = {
diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py
index 99fa42b..c6f7ec9 100755
--- a/tests/hdx/data/test_dataset_core.py
+++ b/tests/hdx/data/test_dataset_core.py
@@ -649,13 +649,17 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern):
         assert dataset["dataset_date"] == "06/04/2016"
 
         dataset["dataset_date"] = "02/26/2016"
-        dataset.remove_tag("conflict")
+        dataset.remove_tag("crisis-somewhere")
         dataset["id"] = "TEST1"
         dataset["name"] = "MyDataset1"
         dataset.update_in_hdx()
         assert dataset["id"] == "TEST1"
         assert dataset["dataset_date"] == "02/26/2016"
-        assert dataset.get_tags() == ["political violence"]
+        assert dataset.get_tags() == [
+            "conflict",
+            "political violence",
+            "crisis-somewhere",
+        ]
         assert dataset["state"] == "active"
         pattern = (
             r"HDXPythonLibrary/%s-test \([12]\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d\)"
@@ -668,6 +672,9 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern):
             "resource_name": "Resource1",
             "view_type": "hdx_hxl_preview",
         }
+        dataset.remove_tag("crisis-somewhere")
+        dataset.update_in_hdx(keep_crisis_tags=False)
+        assert dataset.get_tags() == ["conflict", "political violence"]
         dataset.preview_resourceview = ResourceView(resourceviewdata)
         dataset.update_in_hdx()
         assert dataset.preview_resourceview is None
diff --git a/tests/hdx/data/test_dataset_noncore.py b/tests/hdx/data/test_dataset_noncore.py
index 2843c63..93f64de 100755
--- a/tests/hdx/data/test_dataset_noncore.py
+++ b/tests/hdx/data/test_dataset_noncore.py
@@ -438,10 +438,18 @@ def test_get_add_tags(self, configuration, vocabulary_read):
         datasetdata = copy.deepcopy(dataset_data)
         dataset = Dataset(datasetdata)
         assert dataset["tags"] == resulttags
-        assert dataset.get_tags() == ["conflict", "political violence"]
+        assert dataset.get_tags() == [
+            "conflict",
+            "political violence",
+            "crisis-somewhere",
+        ]
         dataset.add_tag("LALA")
         assert dataset["tags"] == resulttags
-        assert dataset.get_tags() == ["conflict", "political violence"]
+        assert dataset.get_tags() == [
+            "conflict",
+            "political violence",
+            "crisis-somewhere",
+        ]
         dataset.add_tag("conflict")
         expected = copy.deepcopy(resulttags)
         expected.append(
@@ -454,6 +462,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
         assert dataset.get_tags() == [
             "conflict",
             "political violence",
+            "crisis-somewhere",
             "conflict-violence",
         ]
         dataset.add_tags(
@@ -467,6 +476,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
         assert dataset.get_tags() == [
             "conflict",
             "political violence",
+            "crisis-somewhere",
             "conflict-violence",
             "employment",
             "fatalities",
@@ -475,6 +485,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
         assert dataset.get_tags() == [
             "conflict",
             "political violence",
+            "crisis-somewhere",
             "employment",
             "fatalities",
         ]
@@ -534,14 +545,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         Vocabulary.read_tags_mappings(failchained=False)
         datasetdata = copy.deepcopy(dataset_data)
         dataset = Dataset(datasetdata)
-        assert dataset.get_tags() == ["conflict", "political violence"]
+        assert dataset.get_tags() == [
+            "conflict",
+            "political violence",
+            "crisis-somewhere",
+        ]
         assert dataset.clean_tags() == (
-            ["conflict-violence"],
+            ["conflict-violence", "crisis-somewhere"],
             ["political violence"],
         )
         dataset.add_tags(["nodeid123", "transportation"])
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
         ]
         dataset["tags"].append(
@@ -551,21 +567,28 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
             }
         )
         assert dataset.clean_tags() == (
-            ["conflict-violence", "transportation"],
+            ["conflict-violence", "crisis-somewhere", "transportation"],
             ["nodeid123"],
         )
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
         ]
         dataset.add_tags(["geodata", "points"])
         assert dataset.clean_tags() == (
-            ["conflict-violence", "transportation", "geodata"],
+            [
+                "conflict-violence",
+                "crisis-somewhere",
+                "transportation",
+                "geodata",
+            ],
             [],
         )
         dataset.add_tag("financial")
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
             "geodata",
         ]
@@ -576,13 +599,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
             }
         )
         assert dataset.clean_tags() == (
-            ["conflict-violence", "transportation", "geodata"],
+            [
+                "conflict-violence",
+                "crisis-somewhere",
+                "transportation",
+                "geodata",
+            ],
             ["financial"],
         )
         dataset.add_tag("addresses")
         assert dataset.clean_tags() == (
             [
                 "conflict-violence",
+                "crisis-somewhere",
                 "transportation",
                 "geodata",
             ],
@@ -591,12 +620,14 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         dataset.remove_tag("geodata")
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
         ]
         dataset.add_tag("cultivos coca")
         assert dataset.clean_tags() == (
             [
                 "conflict-violence",
+                "crisis-somewhere",
                 "transportation",
                 "livelihoods",
             ],
@@ -606,6 +637,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         dataset.add_tag("atentados")
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
         ]
         dataset["tags"].append(
@@ -617,6 +649,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         assert dataset.clean_tags() == (
             [
                 "conflict-violence",
+                "crisis-somewhere",
                 "transportation",
             ],
             [],
@@ -625,6 +658,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         assert dataset.clean_tags() == (
             [
                 "conflict-violence",
+                "crisis-somewhere",
                 "transportation",
                 "cyclones-hurricanes-typhoons",
             ],
@@ -635,6 +669,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         print(dataset.get_tags())
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "transportation",
             "cyclones-hurricanes-typhoons",
             "affected area",
@@ -643,6 +678,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
         dataset.remove_tag("affected area")
         assert dataset.get_tags() == [
             "conflict-violence",
+            "crisis-somewhere",
             "cyclones-hurricanes-typhoons",
         ]
 
diff --git a/tests/hdx/data/test_vocabulary.py b/tests/hdx/data/test_vocabulary.py
index af2e0c4..6ddf402 100755
--- a/tests/hdx/data/test_vocabulary.py
+++ b/tests/hdx/data/test_vocabulary.py
@@ -237,6 +237,13 @@
                 "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1",
                 "display_name": "covid-19",
             },
+            {
+                "id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52",
+                "state": "active",
+                "display_name": "crisis-somewhere",
+                "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1",
+                "name": "crisis-somewhere",
+            },
             {
                 "id": "326e097b-96f2-46e4-8ef4-0a8d4401a646",
                 "name": "cyclones-hurricanes-typhoons",