Skip to content

Commit

Permalink
HDXDSYS-914 Keep crisis tags on dataset update (#72)
Browse files Browse the repository at this point in the history
* Keep crisis tags

* Keep crisis tags tests

* Add keep_crisis_tags option which by default is True

* Fix PR concurrent workflow runs causing failure
  • Loading branch information
mcarans authored Jul 25, 2024
1 parent a0a4345 commit 671c5e1
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 17 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/run-python-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
branches-ignore:
- gh-pages

concurrency:
group: ${{ github.head_ref || github.ref_name }}


jobs:
build:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"ckanapi>=4.8",
"defopt>=6.4.0",
"email_validator",
"hdx-python-country>=3.7.6",
"hdx-python-country>=3.7.7",
"hdx-python-utilities>=3.7.2",
"libhxl>=5.2.1",
"makefun",
Expand Down
14 changes: 7 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ click==8.1.7
# via typer
coverage==7.6.0
# via pytest-cov
cryptography==42.0.8
cryptography==43.0.0
# via pyopenssl
defopt==6.4.0
# via hdx-python-api (pyproject.toml)
Expand Down Expand Up @@ -56,7 +56,7 @@ google-auth-oauthlib==1.2.1
# via gspread
gspread==6.1.2
# via hdx-python-api (pyproject.toml)
hdx-python-country==3.7.6
hdx-python-country==3.7.7
# via hdx-python-api (pyproject.toml)
hdx-python-utilities==3.7.2
# via
Expand Down Expand Up @@ -150,13 +150,13 @@ pydantic-core==2.20.1
# via pydantic
pygments==2.18.0
# via rich
pyopenssl==24.1.0
pyopenssl==24.2.1
# via
# hdx-python-api (pyproject.toml)
# ndg-httpsclient
pyphonetics==0.5.3
# via hdx-python-country
pytest==8.2.2
pytest==8.3.2
# via
# hdx-python-api (pyproject.toml)
# pytest-cov
Expand Down Expand Up @@ -202,7 +202,7 @@ rfc3986==2.0.0
# via frictionless
rich==13.7.1
# via typer
rpds-py==0.19.0
rpds-py==0.19.1
# via
# jsonschema
# referencing
Expand All @@ -212,7 +212,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==70.3.0
setuptools==71.1.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand All @@ -231,7 +231,7 @@ sphinxcontrib-napoleon==0.7
# via defopt
stringcase==1.2.0
# via frictionless
structlog==24.2.0
structlog==24.4.0
# via libhxl
tableschema-to-template==0.0.13
# via hdx-python-utilities
Expand Down
15 changes: 15 additions & 0 deletions src/hdx/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,19 @@ def _dataset_hdx_update(
match_resource_order,
**kwargs,
)
keep_crisis_tags = kwargs.get("keep_crisis_tags", True)
if keep_crisis_tags:
for tag in self.data["tags"]:
tag_name = tag["name"]
if tag_name[:7] != "crisis-":
continue
found = False
for old_tag in self.old_data["tags"]:
if old_tag["name"] == tag_name:
found = True
break
if not found:
self.old_data["tags"].append(tag)
self._prepare_hdx_call(self.old_data, kwargs)
return self._revise_dataset(
keys_to_delete,
Expand Down Expand Up @@ -1021,6 +1034,7 @@ def update_in_hdx(
create_default_views (bool): Whether to call package_create_default_resource_views. Defaults to True.
hxl_update (bool): Whether to call package_hxl_update. Defaults to True.
**kwargs: See below
keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
updated_by_script (str): String to identify your script. Defaults to your user agent.
batch (str): A string you can specify to show which datasets are part of a single batch update
Expand Down Expand Up @@ -1077,6 +1091,7 @@ def create_in_hdx(
create_default_views (bool): Whether to call package_create_default_resource_views (if updating). Defaults to True.
hxl_update (bool): Whether to call package_hxl_update. Defaults to True.
**kwargs: See below
keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
updated_by_script (str): String to identify your script. Defaults to your user agent.
batch (str): A string you can specify to show which datasets are part of a single batch update
Expand Down
7 changes: 7 additions & 0 deletions tests/hdx/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ def json(self):
"id": "aaafc63b-2234-48e3-8ccc-198d7cf0f3f3",
"name": "political violence",
},
{
"state": "active",
"display_name": "crisis-somewhere",
"vocabulary_id": "4381925f-0ae9-44a3-b30d-cae35598757b",
"id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52",
"name": "crisis-somewhere",
},
]

dataset_data = {
Expand Down
11 changes: 9 additions & 2 deletions tests/hdx/data/test_dataset_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,13 +649,17 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern):
assert dataset["dataset_date"] == "06/04/2016"

dataset["dataset_date"] = "02/26/2016"
dataset.remove_tag("conflict")
dataset.remove_tag("crisis-somewhere")
dataset["id"] = "TEST1"
dataset["name"] = "MyDataset1"
dataset.update_in_hdx()
assert dataset["id"] == "TEST1"
assert dataset["dataset_date"] == "02/26/2016"
assert dataset.get_tags() == ["political violence"]
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
]
assert dataset["state"] == "active"
pattern = (
r"HDXPythonLibrary/%s-test \([12]\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d\)"
Expand All @@ -668,6 +672,9 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern):
"resource_name": "Resource1",
"view_type": "hdx_hxl_preview",
}
dataset.remove_tag("crisis-somewhere")
dataset.update_in_hdx(keep_crisis_tags=False)
assert dataset.get_tags() == ["conflict", "political violence"]
dataset.preview_resourceview = ResourceView(resourceviewdata)
dataset.update_in_hdx()
assert dataset.preview_resourceview is None
Expand Down
50 changes: 43 additions & 7 deletions tests/hdx/data/test_dataset_noncore.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,10 +438,18 @@ def test_get_add_tags(self, configuration, vocabulary_read):
datasetdata = copy.deepcopy(dataset_data)
dataset = Dataset(datasetdata)
assert dataset["tags"] == resulttags
assert dataset.get_tags() == ["conflict", "political violence"]
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
]
dataset.add_tag("LALA")
assert dataset["tags"] == resulttags
assert dataset.get_tags() == ["conflict", "political violence"]
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
]
dataset.add_tag("conflict")
expected = copy.deepcopy(resulttags)
expected.append(
Expand All @@ -454,6 +462,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
"conflict-violence",
]
dataset.add_tags(
Expand All @@ -467,6 +476,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
"conflict-violence",
"employment",
"fatalities",
Expand All @@ -475,6 +485,7 @@ def test_get_add_tags(self, configuration, vocabulary_read):
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
"employment",
"fatalities",
]
Expand Down Expand Up @@ -534,14 +545,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
Vocabulary.read_tags_mappings(failchained=False)
datasetdata = copy.deepcopy(dataset_data)
dataset = Dataset(datasetdata)
assert dataset.get_tags() == ["conflict", "political violence"]
assert dataset.get_tags() == [
"conflict",
"political violence",
"crisis-somewhere",
]
assert dataset.clean_tags() == (
["conflict-violence"],
["conflict-violence", "crisis-somewhere"],
["political violence"],
)
dataset.add_tags(["nodeid123", "transportation"])
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
]
dataset["tags"].append(
Expand All @@ -551,21 +567,28 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
}
)
assert dataset.clean_tags() == (
["conflict-violence", "transportation"],
["conflict-violence", "crisis-somewhere", "transportation"],
["nodeid123"],
)
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
]
dataset.add_tags(["geodata", "points"])
assert dataset.clean_tags() == (
["conflict-violence", "transportation", "geodata"],
[
"conflict-violence",
"crisis-somewhere",
"transportation",
"geodata",
],
[],
)
dataset.add_tag("financial")
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
"geodata",
]
Expand All @@ -576,13 +599,19 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
}
)
assert dataset.clean_tags() == (
["conflict-violence", "transportation", "geodata"],
[
"conflict-violence",
"crisis-somewhere",
"transportation",
"geodata",
],
["financial"],
)
dataset.add_tag("addresses")
assert dataset.clean_tags() == (
[
"conflict-violence",
"crisis-somewhere",
"transportation",
"geodata",
],
Expand All @@ -591,12 +620,14 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
dataset.remove_tag("geodata")
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
]
dataset.add_tag("cultivos coca")
assert dataset.clean_tags() == (
[
"conflict-violence",
"crisis-somewhere",
"transportation",
"livelihoods",
],
Expand All @@ -606,6 +637,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
dataset.add_tag("atentados")
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
]
dataset["tags"].append(
Expand All @@ -617,6 +649,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
assert dataset.clean_tags() == (
[
"conflict-violence",
"crisis-somewhere",
"transportation",
],
[],
Expand All @@ -625,6 +658,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
assert dataset.clean_tags() == (
[
"conflict-violence",
"crisis-somewhere",
"transportation",
"cyclones-hurricanes-typhoons",
],
Expand All @@ -635,6 +669,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
print(dataset.get_tags())
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"transportation",
"cyclones-hurricanes-typhoons",
"affected area",
Expand All @@ -643,6 +678,7 @@ def test_add_clean_tags(self, configuration, vocabulary_read):
dataset.remove_tag("affected area")
assert dataset.get_tags() == [
"conflict-violence",
"crisis-somewhere",
"cyclones-hurricanes-typhoons",
]

Expand Down
7 changes: 7 additions & 0 deletions tests/hdx/data/test_vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,13 @@
"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1",
"display_name": "covid-19",
},
{
"id": "9dae41e5-eacd-4fa5-91df-8d80cf579e52",
"state": "active",
"display_name": "crisis-somewhere",
"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1",
"name": "crisis-somewhere",
},
{
"id": "326e097b-96f2-46e4-8ef4-0a8d4401a646",
"name": "cyclones-hurricanes-typhoons",
Expand Down

0 comments on commit 671c5e1

Please sign in to comment.