Skip to content

Commit

Permalink
Check the GENERAL_QUALITY metadata to see if we can exit early
Browse files Browse the repository at this point in the history
  • Loading branch information
ceholden committed Jan 14, 2025
1 parent b531a6d commit 48b7ba6
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 21 deletions.
88 changes: 70 additions & 18 deletions apply_s2_quality_mask/apply_s2_quality_mask.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, TypeVar
from pathlib import Path

import click
import rasterio
from lxml import etree


# MSI band number definitions for bands used by HLS v2 product
# (coastal, blue/green/red, NIR, SWIR1, SWIR2)
SPECTRAL_BANDS = frozenset({
"B01",
"B02",
"B03",
"B04",
"B8A",
"B11",
"B12",
})
SPECTRAL_BANDS = frozenset(
{
"B01",
"B02",
"B03",
"B04",
"B8A",
"B11",
"B12",
}
)

# ensure compression is lossless to avoid changing values,
# https://gdal.org/en/stable/drivers/raster/jp2openjpeg.html#lossless-compression
Expand All @@ -26,12 +29,55 @@
}


def _one_or_none(paths: List[Path]) -> Optional[Path]:
if len(paths) == 1:
return paths[0]
T = TypeVar("T")


def _one_or_none(seq: List[T]) -> Optional[T]:
if len(seq) == 1:
return seq[0]


def find_affected_bands(granule_dir: Path) -> List[str]:
"""Check the granule GENERAL_QUALITY.xml for affected bands, if any"""
report_path = _one_or_none(list(granule_dir.glob("**/QI_DATA/GENERAL_QUALITY.xml")))

# We couldn't find the report, so check all quality masks
if report_path is None:
return list(SPECTRAL_BANDS)

quality_report = etree.parse(str(report_path))
root = quality_report.getroot()
# give default namespace a name for use in xpath
nsmap = {"qa": root.nsmap[None]}

# We have a XML structure like,
# <check>
# <inspection ... id="Data_Loss" status="FAILED">
# <message>There is data loss in this tile</message>
# <extraValues>
# <value name="Affected_Bands">B01 B02</value>
# </extraValues>
# </check>
#
# We want to grab the text from the "Affected_Bands" when the message
# indicates there is data loss in the tile.
data_loss_bands_txt = _one_or_none(
root.xpath(
(
".//qa:check[qa:message/text() = 'There is data loss in this tile']/"
"qa:extraValues/qa:value[@name = 'Affected_Bands']/text()"
),
namespaces=nsmap,
)
)
if data_loss_bands_txt is not None:
return data_loss_bands_txt.split(" ")
return []


def find_image_mask_pairs(granule_dir: Path) -> List[Tuple[Path, Path]]:
def find_image_mask_pairs(
granule_dir: Path, bands: List[str]
) -> List[Tuple[Path, Path]]:
"""Search granule directory for image + mask pairs
The quality masks were produced in an imagery format since baseline 04.00
Expand All @@ -57,7 +103,7 @@ def find_image_mask_pairs(granule_dir: Path) -> List[Tuple[Path, Path]]:
https://sentinels.copernicus.eu/web/sentinel/-/copernicus-sentinel-2-major-products-upgrade-upcoming
"""
pairs = []
for band in SPECTRAL_BANDS:
for band in bands:
image = _one_or_none(list(granule_dir.glob(f"**/IMG_DATA/*{band}.jp2")))
mask = _one_or_none(list(granule_dir.glob(f"**/QI_DATA/MSK_QUALIT_{band}.jp2")))
if image and mask:
Expand Down Expand Up @@ -119,13 +165,19 @@ def apply_quality_mask(image: Path, mask: Path):
"granule_dir",
type=click.Path(file_okay=False, dir_okay=True, exists=True),
)
def main(granule_dir: Path):
@click.pass_context
def main(ctx, granule_dir: Path):
"""Update Sentinel-2 imagery by masking lost or degraded pixels"""
granule_dir = Path(granule_dir)
click.echo(f"Applying Sentinel-2 QAQC mask to granule_dir={granule_dir}")

image_mask_pairs = find_image_mask_pairs(granule_dir)
affected_bands = find_affected_bands(granule_dir)
affected_hls_bands = SPECTRAL_BANDS.intersection(affected_bands)
if not affected_hls_bands:
click.echo(f"No bands are affected by data loss in {granule_dir}")
ctx.exit()

click.echo(f"Applying Sentinel-2 QAQC mask to granule_dir={granule_dir}")
image_mask_pairs = find_image_mask_pairs(granule_dir, affected_hls_bands)
for (image, mask) in image_mask_pairs:
apply_quality_mask(image, mask)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T12QXM_A048143_20240909T175112/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2A</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2024-07-23T03:00:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.03.00</Creator_Version>
<Creation_Date>UTC=2024-09-10T00:44:41</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2024-09-10T00:44:41.058Z" gippVersion="1.0" globalStatus="PASSED">
<checkList>
<parentID>S2A_OPER_GIP_OLQCPA_MPC__20240717T000043_V20240723T030000</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" url="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/"/>
<check>
<inspection creation="2024-09-10T00:44:40.633Z" duration="357" execution="2024-09-10T00:44:40.672Z" id="Solar_Angle_Zenith" item="S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (26.6116855171579) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">26.6116855171579</value>
</extraValues>
</check>
<check>
<inspection creation="2024-09-10T00:44:41.909Z" duration="16" execution="2024-09-10T00:44:41.914Z" id="checkMetadata" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2024-09-10T00:44:43.339Z" duration="11692" execution="2024-09-10T00:44:43.350Z" id="Data_Loss" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">No data loss in this tile.</message>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T43QDG_A031305_20230305T053523/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2B</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2022-08-30T00:25:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.01.00</Creator_Version>
<Creation_Date>UTC=2023-03-05T08:36:44</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2023-03-05T08:36:44.399Z" gippVersion="1.0" globalStatus="FAILED">
<checkList>
<parentID>S2B_OPER_GIP_OLQCPA_MPC__20220715T000042_V20220830T002500</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" url="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/"/>
<check>
<inspection creation="2023-03-05T08:36:44.100Z" duration="246" execution="2023-03-05T08:36:44.145Z" id="checkMetadata" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.387Z" duration="81" execution="2023-03-05T08:36:45.397Z" id="Solar_Angle_Zenith" item="S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (37.349671777297) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">37.349671777297</value>
</extraValues>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.870Z" duration="23651" execution="2023-03-05T08:36:45.877Z" id="Data_Loss" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="FAILED"/>
<message contentType="text/plain">There is data loss in this tile</message>
<extraValues>
<value name="Affected_Bands">B01 B02 B03 B09 B10 B11 B12 B8A</value>
</extraValues>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2A</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2022-08-30T00:25:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>05.01.00</Creator_Version>
<Creation_Date>UTC=2022-11-21T07:37:39</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2022-11-21T07:37:39.124Z" gippVersion="1.0" globalStatus="FAILED">
<checkList>
<parentID>S2A_OPER_GIP_OLQCPA_MPC__20220715T000042_V20220830T002500</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00" url="/mnt/nfs-l1-02/l1-processing/processing-id-17769-2022-11-21-06-03-12/TaskTable_20221121T060312/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00/"/>
<check>
<inspection creation="2022-11-21T07:37:38.889Z" duration="210" execution="2022-11-21T07:37:38.893Z" id="checkMetadata" item="S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00" itemURL="/mnt/nfs-l1-02/l1-processing/processing-id-17769-2022-11-21-06-03-12/TaskTable_20221121T060312/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2022-11-21T07:37:41.147Z" duration="281" execution="2022-11-21T07:37:41.188Z" id="Solar_Angle_Zenith" item="S2A_OPER_MTD_L1C_TL_ATOS_20221121T053331_A038726_T45TXF.xml" itemURL="/mnt/nfs-l1-02/l1-processing/processing-id-17769-2022-11-21-06-03-12/TaskTable_20221121T060312/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00/S2A_OPER_MTD_L1C_TL_ATOS_20221121T053331_A038726_T45TXF.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (61.7640691258715) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">61.7640691258715</value>
</extraValues>
</check>
<check>
<inspection creation="2022-11-21T07:37:42.488Z" duration="11940" execution="2022-11-21T07:37:42.495Z" id="Data_Loss" item="S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00" itemURL="/mnt/nfs-l1-02/l1-processing/processing-id-17769-2022-11-21-06-03-12/TaskTable_20221121T060312/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_ATOS_20221121T053331_A038726_T45TXF_N04.00/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="FAILED"/>
<message contentType="text/plain">There is data loss in this tile</message>
<extraValues>
<value name="Affected_Bands">B01 B02 B03 B04 B05 B06 B07 B08 B09 B10 B11 B12 B8A</value>
</extraValues>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
37 changes: 34 additions & 3 deletions tests/test_apply_s2_quality_mask.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Tuple
from typing import List, Tuple

import numpy as np
import pytest
Expand All @@ -11,9 +11,40 @@
JPEG2000_NO_COMPRESSION_OPTIONS,
SPECTRAL_BANDS,
apply_quality_mask,
find_affected_bands,
find_image_mask_pairs,
)

TEST_DATA = Path(__file__).parents[0].joinpath("data", "quality-mask")


@pytest.mark.parametrize(["granule_dir", "affected_bands"], [
pytest.param(
TEST_DATA / "L1C_T45TXF_A038726_20221121T050115",
["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B09", "B10", "B11", "B12", "B8A"],
id="All bands affected",
),
pytest.param(
TEST_DATA / "L1C_T43QDG_A031305_20230305T053523",
["B01", "B02", "B03", "B09", "B10", "B11", "B12", "B8A"],
id="Subset of bands affected",
),
pytest.param(
TEST_DATA / "L1C_T12QXM_A048143_20240909T175112",
[],
id="No bands affected",
),
pytest.param(
TEST_DATA,
SPECTRAL_BANDS,
id="No metadata file present",
)
])
def test_find_affected_bands(granule_dir: Path, affected_bands: List[str]):
"""Test we find the bands affected by a quality issue correctly from metadata"""
test = find_affected_bands(granule_dir)
assert set(test) == set(affected_bands)


def make_fake_s2_granule(
prefix: Path,
Expand Down Expand Up @@ -56,7 +87,7 @@ def test_find_image_mask_pairs_found_all(tmp_path: Path):
expected_images_masks.append(make_fake_s2_granule(granule_prefix, band, mask_data))

# Make sure we find all of the expected paths
image_mask_pairs = find_image_mask_pairs(tmp_path)
image_mask_pairs = find_image_mask_pairs(tmp_path, SPECTRAL_BANDS)

for image_mask_pair in expected_images_masks:
assert image_mask_pair in image_mask_pairs
Expand All @@ -74,7 +105,7 @@ def test_find_image_mask_pairs_found_not_all(tmp_path: Path):
expected_images_masks.append(make_fake_s2_granule(granule_prefix, band, mask_data))

# Make sure we find all of the expected paths
image_mask_pairs = find_image_mask_pairs(tmp_path)
image_mask_pairs = find_image_mask_pairs(tmp_path, SPECTRAL_BANDS)
assert not image_mask_pairs


Expand Down

0 comments on commit 48b7ba6

Please sign in to comment.