Skip to content

Commit

Permalink
feat: add script to apply ESA quality mask (#15)
Browse files Browse the repository at this point in the history
* Add CLI to apply per-band QA mask for lost or degraded pixel values

* Update list of spectral bands to subset used by HLS

* Add unit tests and fix JPEG2000 compression on update

* Update README with apply_s2_quality_mask usage

* Check the GENERAL_QUALITY metadata to see if we can exit early

* granule_dir is actually a str in click==7.x
  • Loading branch information
ceholden authored Jan 27, 2025
1 parent 7eeda8e commit 883f585
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 1 deletion.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

## Usage
```bash
$ apply_s2_quality_mask INPUTS2DIR
```
```bash
$ check_solar_zenith_sentinel INPUTXML
```
```bash
Expand Down
Empty file.
188 changes: 188 additions & 0 deletions apply_s2_quality_mask/apply_s2_quality_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from typing import List, Optional, Tuple, TypeVar
from pathlib import Path

import click
import rasterio
from lxml import etree


# MSI band number definitions for bands used by HLS v2 product
# (coastal, blue/green/red, NIR, SWIR1, SWIR2)
SPECTRAL_BANDS = frozenset(
{
"B01",
"B02",
"B03",
"B04",
"B8A",
"B11",
"B12",
}
)

# ensure compression is lossless to avoid changing values,
# https://gdal.org/en/stable/drivers/raster/jp2openjpeg.html#lossless-compression
JPEG2000_NO_COMPRESSION_OPTIONS = {
"QUALITY": "100",
"REVERSIBLE": "YES",
"YCBCR420": "NO",
}


T = TypeVar("T")


def _one_or_none(seq: List[T]) -> Optional[T]:
if len(seq) == 1:
return seq[0]


def find_affected_bands(granule_dir: Path) -> List[str]:
"""Check the granule GENERAL_QUALITY.xml for affected bands, if any"""
report_path = _one_or_none(list(granule_dir.glob("**/QI_DATA/GENERAL_QUALITY.xml")))

# We couldn't find the report, so check all quality masks
if report_path is None:
return list(SPECTRAL_BANDS)

quality_report = etree.parse(str(report_path))
root = quality_report.getroot()
# give default namespace a name for use in xpath
nsmap = {"qa": root.nsmap[None]}

# We have a XML structure like,
# <check>
# <inspection ... id="Data_Loss" status="FAILED">
# <message>There is data loss in this tile</message>
# <extraValues>
# <value name="Affected_Bands">B01 B02</value>
# </extraValues>
# </check>
#
# We want to grab the text from the "Affected_Bands" when the message
# indicates there is data loss in the tile.
data_loss_bands_txt = _one_or_none(
root.xpath(
(
".//qa:check[qa:message/text() = 'There is data loss in this tile']/"
"qa:extraValues/qa:value[@name = 'Affected_Bands']/text()"
),
namespaces=nsmap,
)
)
if data_loss_bands_txt is not None:
return data_loss_bands_txt.split(" ")
return []


def find_image_mask_pairs(
granule_dir: Path, bands: List[str]
) -> List[Tuple[Path, Path]]:
"""Search granule directory for image + mask pairs
The quality masks were produced in an imagery format since baseline 04.00
(~Jan 25, 2022 and onward), so this function might return nothing
if run on granules created with older baselines. These older baselines
encoded the mask quality information in the GML (vector) format.
The relevant parts of an unzipped granule looks like,
```
${GRANULE_ID}.SAFE
${GRANULE_ID}.SAFE/GRANULE
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA/T45TXF_20221121T050121_B01.jp2
... (*B*.jp2)
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA/MSK_QUALIT_B01.jp2
... (MSK_QUALIT_B*.jp2)
```
References
----------
https://sentinels.copernicus.eu/web/sentinel/-/copernicus-sentinel-2-major-products-upgrade-upcoming
"""
pairs = []
for band in bands:
image = _one_or_none(list(granule_dir.glob(f"**/IMG_DATA/*{band}.jp2")))
mask = _one_or_none(list(granule_dir.glob(f"**/QI_DATA/MSK_QUALIT_{band}.jp2")))
if image and mask:
pairs.append((image, mask))

# Find all bands, or none
if len(pairs) == len(SPECTRAL_BANDS):
return pairs
return []


def apply_quality_mask(image: Path, mask: Path):
"""Apply Sentinel-2 image quality mask
Each spectral band (`IMG_DATA/B*.jp2`) has a corresponding quality mask
image (`QI_DATA/MSK_QUALIT_B*.jp2`) of the same spatial resolution. The mask
image has 8 bands, with each band encoding a boolean for the presence/absence
of a quality issue. The bands indicate,
1: lost ancillary packets
2: degraded ancillary packets
3: lost MSI packets
4: degraded MSI packets
5: defective pixels
6: no data
7: partially corrected cross talk
8: saturated pixels
We have decided to mask 3 (lost MSI packets) and 4 (degraded MSI packets) only.
So far, 5 and 7 are present in B10-B11, because interpolation is used to fill 5
and the cross talk is partially corrected.
Saturated pixels are still useful for the magnitude of the reflectance; keep it.
We apply the mask by updating the spectral data images in-place rather than
creating another file.
References
----------
https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/data-quality-reports
"""
with rasterio.open(mask) as mask_src:
qa_bands = mask_src.read(indexes=[3, 4])
lost_degraded_mask = (qa_bands == 1).any(axis=0)

# only update imagery if mask shows it has any bad pixels
if lost_degraded_mask.any():
click.echo(f"Masking lost or degraded pixel values in {image}")
with rasterio.open(image, "r+", **JPEG2000_NO_COMPRESSION_OPTIONS) as img_src:
img = img_src.read(1)
# L1C images don't define the nodata value on file so we can't update the
# mask (e.g., via `write_mask`) but 0 is used as the nodata value
# (see SPECIAL_VALUE_INDEX for NODATA in metadata)
img[lost_degraded_mask] = 0
img_src.write(img, 1)


@click.command()
@click.argument(
"granule_dir",
type=click.Path(file_okay=False, dir_okay=True, exists=True),
)
@click.pass_context
def main(ctx, granule_dir: str):
"""Update Sentinel-2 imagery by masking lost or degraded pixels"""
granule_dir = Path(granule_dir)

affected_bands = find_affected_bands(granule_dir)
affected_hls_bands = SPECTRAL_BANDS.intersection(affected_bands)
if not affected_hls_bands:
click.echo(f"No bands are affected by data loss in {granule_dir}")
ctx.exit()

click.echo(f"Applying Sentinel-2 QAQC mask to granule_dir={granule_dir}")
image_mask_pairs = find_image_mask_pairs(granule_dir, affected_hls_bands)
for (image, mask) in image_mask_pairs:
apply_quality_mask(image, mask)

click.echo("Complete")


if __name__ == "__main__":
main()
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
"click~=7.1.0",
"lxml",
"boto3~=1.17.91",
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library"
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library",
"rasterio~=1.2",
"numpy~=1.16",
],
include_package_data=True,
extras_require={
"dev": ["flake8", "black"],
"test": ["flake8", "pytest", "Jinja2==2.10.1", "moto[s3]~=2.0.8"]
},
entry_points={"console_scripts": [
"apply_s2_quality_mask=apply_s2_quality_mask.apply_s2_quality_mask:main",
"parse_fmask=parse_fmask.parse_fmask:main",
"check_solar_zenith_sentinel=check_solar_zenith_sentinel.check_solar_zenith_sentinel:main",
"check_solar_zenith_landsat=check_solar_zenith_landsat.check_solar_zenith_landsat:main",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T12QXM_A048143_20240909T175112/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2A</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2024-07-23T03:00:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.03.00</Creator_Version>
<Creation_Date>UTC=2024-09-10T00:44:41</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2024-09-10T00:44:41.058Z" gippVersion="1.0" globalStatus="PASSED">
<checkList>
<parentID>S2A_OPER_GIP_OLQCPA_MPC__20240717T000043_V20240723T030000</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" url="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/"/>
<check>
<inspection creation="2024-09-10T00:44:40.633Z" duration="357" execution="2024-09-10T00:44:40.672Z" id="Solar_Angle_Zenith" item="S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (26.6116855171579) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">26.6116855171579</value>
</extraValues>
</check>
<check>
<inspection creation="2024-09-10T00:44:41.909Z" duration="16" execution="2024-09-10T00:44:41.914Z" id="checkMetadata" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2024-09-10T00:44:43.339Z" duration="11692" execution="2024-09-10T00:44:43.350Z" id="Data_Loss" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">No data loss in this tile.</message>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T43QDG_A031305_20230305T053523/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2B</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2022-08-30T00:25:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.01.00</Creator_Version>
<Creation_Date>UTC=2023-03-05T08:36:44</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2023-03-05T08:36:44.399Z" gippVersion="1.0" globalStatus="FAILED">
<checkList>
<parentID>S2B_OPER_GIP_OLQCPA_MPC__20220715T000042_V20220830T002500</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" url="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/"/>
<check>
<inspection creation="2023-03-05T08:36:44.100Z" duration="246" execution="2023-03-05T08:36:44.145Z" id="checkMetadata" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.387Z" duration="81" execution="2023-03-05T08:36:45.397Z" id="Solar_Angle_Zenith" item="S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (37.349671777297) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">37.349671777297</value>
</extraValues>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.870Z" duration="23651" execution="2023-03-05T08:36:45.877Z" id="Data_Loss" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="FAILED"/>
<message contentType="text/plain">There is data loss in this tile</message>
<extraValues>
<value name="Affected_Bands">B01 B02 B03 B09 B10 B11 B12 B8A</value>
</extraValues>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Loading

0 comments on commit 883f585

Please sign in to comment.