Skip to content

Commit

Permalink
HDXDSYS-683 Remove any timezone information in iso formatted data (#67)
Browse files Browse the repository at this point in the history
Fix for timezone incorrectly being in iso formatted strings
  • Loading branch information
mcarans authored Apr 26, 2024
1 parent e4cd53a commit ecd2d4c
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 63 deletions.
4 changes: 2 additions & 2 deletions .config/pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
exclude: test_scraper_.*\.json
- id: check-ast
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
rev: v0.4.2
hooks:
# Run the linter.
- id: ruff
Expand All @@ -18,7 +18,7 @@ repos:
- id: ruff-format
args: [--config, .config/ruff.toml]
- repo: https://github.com/astral-sh/uv-pre-commit
rev: v0.1.24
rev: 0.1.38
hooks:
# Run the pip compile
- id: pip-compile
Expand Down
4 changes: 4 additions & 0 deletions documentation/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ The library has detailed API documentation which can be found in the menu at the


## Breaking Changes
From 6.2.8, fix mark_data_updated which was broken due to an error in
dataset_update_filestore_resource in which timezone information was
incorrectly added to the iso formatted string

From 6.2.7, generate_resource_from_iterator renamed to
generate_resource_from_iterable with requirement of iterable rather iterator

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"defopt>=6.4.0",
"email_validator",
"hdx-python-country>=3.7.0",
"hdx-python-utilities>=3.6.7",
"hdx-python-utilities>=3.6.8",
"libhxl>=5.2.1",
"makefun",
"ndg-httpsclient",
Expand Down
28 changes: 14 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ charset-normalizer==3.3.2
ckanapi==4.8
click==8.1.7
# via typer
coverage==7.4.4
coverage==7.5.0
# via pytest-cov
cryptography==42.0.5
# via pyopenssl
Expand All @@ -35,7 +35,7 @@ docopt==0.6.2
# via
# ckanapi
# num2words
docutils==0.21.post1
docutils==0.21.2
# via defopt
email-validator==2.1.1
et-xmlfile==1.1.0
Expand All @@ -52,19 +52,19 @@ google-auth-oauthlib==1.2.0
# via gspread
gspread==6.1.0
hdx-python-country==3.7.0
hdx-python-utilities==3.6.7
hdx-python-utilities==3.6.8
# via hdx-python-country
humanize==4.9.0
# via frictionless
identify==2.5.35
identify==2.5.36
# via pre-commit
idna==3.6
idna==3.7
# via
# email-validator
# requests
ijson==3.2.3
# via hdx-python-utilities
inflect==7.2.0
inflect==7.2.1
# via quantulum3
iniconfig==2.0.0
# via pytest
Expand Down Expand Up @@ -108,9 +108,9 @@ packaging==24.0
# via pytest
petl==1.7.15
# via frictionless
platformdirs==4.2.0
platformdirs==4.2.1
# via virtualenv
pluggy==1.4.0
pluggy==1.5.0
# via pytest
ply==3.11
# via
Expand All @@ -128,9 +128,9 @@ pyasn1-modules==0.4.0
# via google-auth
pycparser==2.22
# via cffi
pydantic==2.6.4
pydantic==2.7.1
# via frictionless
pydantic-core==2.16.3
pydantic-core==2.18.2
# via pydantic
pygments==2.17.2
# via rich
Expand Down Expand Up @@ -159,7 +159,7 @@ pyyaml==6.0.1
# frictionless
# pre-commit
# tableschema-to-template
quantulum3==0.9.0
quantulum3==0.9.1
ratelimit==2.2.1
# via hdx-python-utilities
requests==2.31.0
Expand All @@ -183,7 +183,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==69.2.0
setuptools==69.5.1
# via
# ckanapi
# nodeenv
Expand Down Expand Up @@ -234,9 +234,9 @@ urllib3==2.2.1
# via
# libhxl
# requests
validators==0.28.0
validators==0.28.1
# via frictionless
virtualenv==20.25.1
virtualenv==20.26.0
# via pre-commit
wheel==0.43.0
# via libhxl
Expand Down
7 changes: 4 additions & 3 deletions src/hdx/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
import warnings
from copy import deepcopy
from datetime import datetime, timezone
from datetime import datetime
from os.path import isfile, join
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -41,6 +41,7 @@
default_date,
default_enddate,
now_utc,
now_utc_notz,
parse_date,
parse_date_range,
)
Expand Down Expand Up @@ -598,9 +599,9 @@ def _prepare_hdx_call(self, data: Dict, kwargs: Any) -> None:
del kwargs["updated_by_script"]
else:
scriptinfo = self.configuration.get_user_agent()
# No need to output timezone info here
# Should not output timezone info here
data["updated_by_script"] = (
f"{scriptinfo} ({datetime.now(timezone.utc).replace(tzinfo=None).isoformat(timespec='microseconds')})"
f"{scriptinfo} ({now_utc_notz().isoformat(timespec='microseconds')})"
)
batch = kwargs.get("batch")
if batch:
Expand Down
10 changes: 6 additions & 4 deletions src/hdx/data/filestore_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Helper to the Dataset class for handling resources with filestores."""

from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Dict

from hdx.utilities.dateparse import now_utc_notz

if TYPE_CHECKING:
from hdx.data.resource import Resource

Expand Down Expand Up @@ -90,7 +91,8 @@ def dataset_update_filestore_resource(

data_updated = resource_data_to_update.is_marked_data_updated()
if data_updated:
resource_data_to_update["last_modified"] = datetime.now(
timezone.utc
).isoformat(timespec="microseconds")
# Should not output timezone info here
resource_data_to_update["last_modified"] = (
now_utc_notz().isoformat(timespec="microseconds")
)
resource_data_to_update.data_updated = False
11 changes: 6 additions & 5 deletions src/hdx/data/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import warnings
from datetime import datetime, timezone
from datetime import datetime
from os.path import join
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
Expand All @@ -13,7 +13,7 @@
from hdx.data.date_helper import DateHelper
from hdx.data.hdxobject import HDXError, HDXObject
from hdx.data.resource_view import ResourceView
from hdx.utilities.dateparse import now_utc, parse_date
from hdx.utilities.dateparse import now_utc, now_utc_notz, parse_date
from hdx.utilities.downloader import Download
from hdx.utilities.typehint import ListTuple
from hdx.utilities.uuid import is_valid_uuid
Expand Down Expand Up @@ -393,9 +393,10 @@ def _resource_merge_hdx_update(
"""
data_updated = kwargs.pop("data_updated", self.data_updated)
if data_updated and not self.file_to_upload:
self.old_data["last_modified"] = datetime.now(
timezone.utc
).isoformat(timespec="microseconds")
# Should not output timezone info here
self.old_data["last_modified"] = now_utc_notz().isoformat(
timespec="microseconds"
)
self.data_updated = False
# old_data will be merged into data in the next step
self._merge_hdx_update(
Expand Down
10 changes: 10 additions & 0 deletions tests/hdx/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ def json(self):
},
]

resource_data = {
"name": "MyResource1",
"package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d",
"format": "xlsx",
"url": "http://test/spreadsheet.xlsx",
"description": "My Resource",
"api_type": "api",
"resource_type": "api",
}

organization_data = {
"name": "MyOrganization1",
"title": "Humanitarian Organization",
Expand Down
49 changes: 49 additions & 0 deletions tests/hdx/data/test_filestore_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import copy
import re

from . import resource_data
from hdx.data.filestore_helper import FilestoreHelper
from hdx.data.resource import Resource


class TestFilestoreHelper:
def test_dataset_update_filestore_resource(self, configuration):
resource_data_copy = copy.deepcopy(resource_data)
resource = Resource(resource_data_copy)
filestore_resources = {}
FilestoreHelper.dataset_update_filestore_resource(
resource, filestore_resources, 0
)
assert resource == {
"api_type": "api",
"description": "My Resource",
"format": "xlsx",
"name": "MyResource1",
"package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d",
"resource_type": "api",
"url": "http://test/spreadsheet.xlsx",
}
assert filestore_resources == {}

resource.set_file_to_upload("test")
FilestoreHelper.dataset_update_filestore_resource(
resource, filestore_resources, 0
)
assert resource == {
"api_type": "api",
"description": "My Resource",
"format": "xlsx",
"name": "MyResource1",
"package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d",
"resource_type": "api",
"url": "updated_by_file_upload_step",
}
assert filestore_resources == {0: "test"}

resource.mark_data_updated()
FilestoreHelper.dataset_update_filestore_resource(
resource, filestore_resources, 0
)
regex = r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d$"
assert re.match(regex, resource["last_modified"])
assert filestore_resources == {0: "test"}
Loading

0 comments on commit ecd2d4c

Please sign in to comment.