Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update metadata to include HLS granule ID and links to Fmask layer #48

Merged
merged 9 commits into from
Jan 8, 2025
61 changes: 56 additions & 5 deletions hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import sys
from xml.dom import minidom

from datetime import datetime, timezone
from pathlib import Path
Expand Down Expand Up @@ -92,7 +93,13 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
processing_time = tags["HLS_VI_PROCESSING_TIME"]

granule_ur = tree.find("GranuleUR")
input_granule_ur = granule_ur.text
granule_ur.text = granule_ur.text.replace("HLS", "HLS-VI")
set_additional_attribute(
tree.find("AdditionalAttributes"),
"Input_HLS_GranuleUR",
input_granule_ur,
)

time_format = "%Y-%m-%dT%H:%M:%S.%fZ"
formatted_date = datetime.now(timezone.utc).strftime(time_format)
Expand Down Expand Up @@ -125,24 +132,35 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:

tree.find("DataFormat").text = "COG"

append_fmask_online_access_urls(
tree.find("OnlineAccessURLs"),
input_granule_ur,
)

with (
importlib_resources.files("hls_vi")
/ "schema"
/ "Granule.xsd" # pyright: ignore[reportOperatorIssue]
).open() as xsd:
ET.XMLSchema(file=xsd).assertValid(tree)

tree.write(
str(output_dir / metadata_path.name.replace("HLS", "HLS-VI")),
encoding="utf-8",
xml_declaration=True,
# Python 3.9 or `lxml==4.5` add an `indent()` function to nicely format our XML
# Alas we cannot use those yet, so rely on this approach using `xml.dom.minidom`
dom = minidom.parseString(
ET.tostring(tree, xml_declaration=True, pretty_print=False)
)
pretty_xml = os.linesep.join(
[line for line in dom.toprettyxml(indent=" ").splitlines() if line.strip()]
)

dest = output_dir / metadata_path.name.replace("HLS", "HLS-VI")
dest.write_text(pretty_xml, encoding="utf-8")


def normalize_additional_attributes(container: ElementBase) -> None:
"""Normalize additional attribute values.

On rare occassions, granule data is split and recombined upstream. When this
On rare occasions, granule data is split and recombined upstream. When this
occurs, the associated metadata is also split and recombined, resulting in values
for additional attributes that are created by joining the separate parts with the
string `" + "`.
Expand Down Expand Up @@ -193,6 +211,39 @@ def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None:
attrs.append(attr)


def append_fmask_online_access_urls(
access_urls: ElementBase, hls_granule_ur: str
) -> None:
"""Include links to Fmask layer from HLS granule in metadata

This is intended to help users find the relevant Fmask band without
having to duplicate it into the HLS-VI product. See,
https://github.com/NASA-IMPACT/hls-vi/issues/47
"""
prefix = "HLSL30.020" if hls_granule_ur.startswith("HLS.L30") else "HLSS30.020"

http_attr = Element("OnlineAccessURL", None, None)
http_attr_url = Element("URL", None, None)
http_attr_url.text = f"https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif" # noqa: E501
http_attr_desc = Element("URLDescription", None, None)
http_attr_desc.text = f"Download Fmask quality layer {hls_granule_ur}.Fmask.tif"
http_attr.append(http_attr_url)
http_attr.append(http_attr_desc)

s3_attr = Element("OnlineAccessURL", None, None)
s3_attr_url = Element("URL", None, None)
s3_attr_url.text = (
f"s3://lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif"
)
s3_attr_desc = Element("URLDescription", None, None)
s3_attr_desc.text = f"This link provides direct download access via S3 to the Fmask quality layer {hls_granule_ur}.Fmask.tif" # noqa: E501
s3_attr.append(s3_attr_url)
s3_attr.append(s3_attr_desc)

access_urls.append(http_attr)
access_urls.append(s3_attr)


def parse_args() -> Tuple[Path, Path]:
short_options = "i:o:"
long_options = ["instrument=", "inputdir=", "outputdir="]
Expand Down
3 changes: 2 additions & 1 deletion hls_vi/schema/Granule.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="GranuleDelete" type="GranuleDelete">
</xs:element>
<xs:element name="GranuleUR" type="GranuleUR"></xs:element>
<xs:element name="Input_GranuleUR" type="GranuleUR"></xs:element>
<xs:complexType name="GranuleMetaDataFile">
<xs:annotation>
<xs:documentation />
Expand Down Expand Up @@ -1298,7 +1299,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
type="ListOfAdditionalAttributeValues">
<xs:annotation>
<xs:documentation>The ordered list of values of the
additioanl attribute for this granule. The values will be
additional attribute for this granule. The values will be
kept in the order which they appear.</xs:documentation>
</xs:annotation>
</xs:element>
Expand Down
2 changes: 1 addition & 1 deletion hls_vi/schema/MetadataCommon.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@
</xs:sequence>
</xs:choice>
</xs:complexType>
<!-- #mark Emtpy Type -->
<!-- #mark Empty Type -->
<xs:complexType name="EmptyType">
<xs:annotation>
<xs:documentation>The element should contain no children. In
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.L30.T06WVS.2024120T211159</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -240,8 +240,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.L30.T06WVS.2024120T211159.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.S30.T13RCN.2024128T173909</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -302,8 +302,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.S30.T13RCN.2024128T173909.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down