-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add script to apply ESA quality mask (#15)
* Add CLI to apply per-band QA mask for lost or degraded pixel values * Update list of spectral bands to subset used by HLS * Add unit tests and fix JPEG2000 compression on update * Update README with apply_s2_quality_mask usage * Check the GENERAL_QUALITY metadata to see if we can exit early * granule_dir is actually a str in click==7.x
- Loading branch information
Showing
8 changed files
with
488 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
from typing import List, Optional, Tuple, TypeVar | ||
from pathlib import Path | ||
|
||
import click | ||
import rasterio | ||
from lxml import etree | ||
|
||
|
||
# MSI band number definitions for bands used by HLS v2 product | ||
# (coastal, blue/green/red, NIR, SWIR1, SWIR2) | ||
SPECTRAL_BANDS = frozenset( | ||
{ | ||
"B01", | ||
"B02", | ||
"B03", | ||
"B04", | ||
"B8A", | ||
"B11", | ||
"B12", | ||
} | ||
) | ||
|
||
# ensure compression is lossless to avoid changing values, | ||
# https://gdal.org/en/stable/drivers/raster/jp2openjpeg.html#lossless-compression | ||
JPEG2000_NO_COMPRESSION_OPTIONS = { | ||
"QUALITY": "100", | ||
"REVERSIBLE": "YES", | ||
"YCBCR420": "NO", | ||
} | ||
|
||
|
||
T = TypeVar("T") | ||
|
||
|
||
def _one_or_none(seq: List[T]) -> Optional[T]: | ||
if len(seq) == 1: | ||
return seq[0] | ||
|
||
|
||
def find_affected_bands(granule_dir: Path) -> List[str]: | ||
"""Check the granule GENERAL_QUALITY.xml for affected bands, if any""" | ||
report_path = _one_or_none(list(granule_dir.glob("**/QI_DATA/GENERAL_QUALITY.xml"))) | ||
|
||
# We couldn't find the report, so check all quality masks | ||
if report_path is None: | ||
return list(SPECTRAL_BANDS) | ||
|
||
quality_report = etree.parse(str(report_path)) | ||
root = quality_report.getroot() | ||
# give default namespace a name for use in xpath | ||
nsmap = {"qa": root.nsmap[None]} | ||
|
||
# We have a XML structure like, | ||
# <check> | ||
# <inspection ... id="Data_Loss" status="FAILED"> | ||
# <message>There is data loss in this tile</message> | ||
# <extraValues> | ||
# <value name="Affected_Bands">B01 B02</value> | ||
# </extraValues> | ||
# </check> | ||
# | ||
# We want to grab the text from the "Affected_Bands" when the message | ||
# indicates there is data loss in the tile. | ||
data_loss_bands_txt = _one_or_none( | ||
root.xpath( | ||
( | ||
".//qa:check[qa:message/text() = 'There is data loss in this tile']/" | ||
"qa:extraValues/qa:value[@name = 'Affected_Bands']/text()" | ||
), | ||
namespaces=nsmap, | ||
) | ||
) | ||
if data_loss_bands_txt is not None: | ||
return data_loss_bands_txt.split(" ") | ||
return [] | ||
|
||
|
||
def find_image_mask_pairs( | ||
granule_dir: Path, bands: List[str] | ||
) -> List[Tuple[Path, Path]]: | ||
"""Search granule directory for image + mask pairs | ||
The quality masks were produced in an imagery format since baseline 04.00 | ||
(~Jan 25, 2022 and onward), so this function might return nothing | ||
if run on granules created with older baselines. These older baselines | ||
encoded the mask quality information in the GML (vector) format. | ||
The relevant parts of an unzipped granule looks like, | ||
``` | ||
${GRANULE_ID}.SAFE | ||
${GRANULE_ID}.SAFE/GRANULE | ||
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115 | ||
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA | ||
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA/T45TXF_20221121T050121_B01.jp2 | ||
... (*B*.jp2) | ||
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA | ||
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA/MSK_QUALIT_B01.jp2 | ||
... (MSK_QUALIT_B*.jp2) | ||
``` | ||
References | ||
---------- | ||
https://sentinels.copernicus.eu/web/sentinel/-/copernicus-sentinel-2-major-products-upgrade-upcoming | ||
""" | ||
pairs = [] | ||
for band in bands: | ||
image = _one_or_none(list(granule_dir.glob(f"**/IMG_DATA/*{band}.jp2"))) | ||
mask = _one_or_none(list(granule_dir.glob(f"**/QI_DATA/MSK_QUALIT_{band}.jp2"))) | ||
if image and mask: | ||
pairs.append((image, mask)) | ||
|
||
# Find all bands, or none | ||
if len(pairs) == len(SPECTRAL_BANDS): | ||
return pairs | ||
return [] | ||
|
||
|
||
def apply_quality_mask(image: Path, mask: Path): | ||
"""Apply Sentinel-2 image quality mask | ||
Each spectral band (`IMG_DATA/B*.jp2`) has a corresponding quality mask | ||
image (`QI_DATA/MSK_QUALIT_B*.jp2`) of the same spatial resolution. The mask | ||
image has 8 bands, with each band encoding a boolean for the presence/absence | ||
of a quality issue. The bands indicate, | ||
1: lost ancillary packets | ||
2: degraded ancillary packets | ||
3: lost MSI packets | ||
4: degraded MSI packets | ||
5: defective pixels | ||
6: no data | ||
7: partially corrected cross talk | ||
8: saturated pixels | ||
We have decided to mask 3 (lost MSI packets) and 4 (degraded MSI packets) only. | ||
So far, 5 and 7 are present in B10-B11, because interpolation is used to fill 5 | ||
and the cross talk is partially corrected. | ||
Saturated pixels are still useful for the magnitude of the reflectance; keep it. | ||
We apply the mask by updating the spectral data images in-place rather than | ||
creating another file. | ||
References | ||
---------- | ||
https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/data-quality-reports | ||
""" | ||
with rasterio.open(mask) as mask_src: | ||
qa_bands = mask_src.read(indexes=[3, 4]) | ||
lost_degraded_mask = (qa_bands == 1).any(axis=0) | ||
|
||
# only update imagery if mask shows it has any bad pixels | ||
if lost_degraded_mask.any(): | ||
click.echo(f"Masking lost or degraded pixel values in {image}") | ||
with rasterio.open(image, "r+", **JPEG2000_NO_COMPRESSION_OPTIONS) as img_src: | ||
img = img_src.read(1) | ||
# L1C images don't define the nodata value on file so we can't update the | ||
# mask (e.g., via `write_mask`) but 0 is used as the nodata value | ||
# (see SPECIAL_VALUE_INDEX for NODATA in metadata) | ||
img[lost_degraded_mask] = 0 | ||
img_src.write(img, 1) | ||
|
||
|
||
@click.command() | ||
@click.argument( | ||
"granule_dir", | ||
type=click.Path(file_okay=False, dir_okay=True, exists=True), | ||
) | ||
@click.pass_context | ||
def main(ctx, granule_dir: str): | ||
"""Update Sentinel-2 imagery by masking lost or degraded pixels""" | ||
granule_dir = Path(granule_dir) | ||
|
||
affected_bands = find_affected_bands(granule_dir) | ||
affected_hls_bands = SPECTRAL_BANDS.intersection(affected_bands) | ||
if not affected_hls_bands: | ||
click.echo(f"No bands are affected by data loss in {granule_dir}") | ||
ctx.exit() | ||
|
||
click.echo(f"Applying Sentinel-2 QAQC mask to granule_dir={granule_dir}") | ||
image_mask_pairs = find_image_mask_pairs(granule_dir, affected_hls_bands) | ||
for (image, mask) in image_mask_pairs: | ||
apply_quality_mask(image, mask) | ||
|
||
click.echo("Complete") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,14 +8,17 @@ | |
"click~=7.1.0", | ||
"lxml", | ||
"boto3~=1.17.91", | ||
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library" | ||
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library", | ||
"rasterio~=1.2", | ||
"numpy~=1.16", | ||
], | ||
include_package_data=True, | ||
extras_require={ | ||
"dev": ["flake8", "black"], | ||
"test": ["flake8", "pytest", "Jinja2==2.10.1", "moto[s3]~=2.0.8"] | ||
}, | ||
entry_points={"console_scripts": [ | ||
"apply_s2_quality_mask=apply_s2_quality_mask.apply_s2_quality_mask:main", | ||
"parse_fmask=parse_fmask.parse_fmask:main", | ||
"check_solar_zenith_sentinel=check_solar_zenith_sentinel.check_solar_zenith_sentinel:main", | ||
"check_solar_zenith_landsat=check_solar_zenith_landsat.check_solar_zenith_landsat:main", | ||
|
49 changes: 49 additions & 0 deletions
49
tests/data/quality-mask/L1C_T12QXM_A048143_20240909T175112/QI_DATA/GENERAL_QUALITY.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport"> | ||
<Earth_Explorer_Header> | ||
<Fixed_Header> | ||
<File_Name>GRANULE/L1C_T12QXM_A048143_20240909T175112/QI_DATA/GENERAL_QUALITY.xml</File_Name> | ||
<File_Description>Report produced by OLQC-SC</File_Description> | ||
<Notes/> | ||
<Mission>S2A</Mission> | ||
<File_Class>OPER</File_Class> | ||
<File_Type>REP_OLQCPA</File_Type> | ||
<Validity_Period> | ||
<Validity_Start>UTC=2024-07-23T03:00:00</Validity_Start> | ||
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop> | ||
</Validity_Period> | ||
<File_Version>2</File_Version> | ||
<Source> | ||
<System>OLQC-SC</System> | ||
<Creator>OLQC-SC</Creator> | ||
<Creator_Version>06.03.00</Creator_Version> | ||
<Creation_Date>UTC=2024-09-10T00:44:41</Creation_Date> | ||
</Source> | ||
</Fixed_Header> | ||
</Earth_Explorer_Header> | ||
<Data_Block type="xml"> | ||
<report date="2024-09-10T00:44:41.058Z" gippVersion="1.0" globalStatus="PASSED"> | ||
<checkList> | ||
<parentID>S2A_OPER_GIP_OLQCPA_MPC__20240717T000043_V20240723T030000</parentID> | ||
<name>GENERAL_QUALITY</name> | ||
<version>1.0</version> | ||
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" url="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/"/> | ||
<check> | ||
<inspection creation="2024-09-10T00:44:40.633Z" duration="357" execution="2024-09-10T00:44:40.672Z" id="Solar_Angle_Zenith" item="S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/> | ||
<message contentType="text/plain">Zenith solar angle (26.6116855171579) is within the threshold limit (82.0) </message> | ||
<extraValues> | ||
<value name="EXPECTED">82.0</value> | ||
<value name="SOLAR_ZENITH_ANGLE">26.6116855171579</value> | ||
</extraValues> | ||
</check> | ||
<check> | ||
<inspection creation="2024-09-10T00:44:41.909Z" duration="16" execution="2024-09-10T00:44:41.914Z" id="checkMetadata" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/> | ||
<message contentType="text/plain">Metadata file is present.</message> | ||
</check> | ||
<check> | ||
<inspection creation="2024-09-10T00:44:43.339Z" duration="11692" execution="2024-09-10T00:44:43.350Z" id="Data_Loss" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="PASSED"/> | ||
<message contentType="text/plain">No data loss in this tile.</message> | ||
</check> | ||
</checkList> | ||
</report> | ||
</Data_Block> | ||
</Earth_Explorer_File> |
52 changes: 52 additions & 0 deletions
52
tests/data/quality-mask/L1C_T43QDG_A031305_20230305T053523/QI_DATA/GENERAL_QUALITY.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport"> | ||
<Earth_Explorer_Header> | ||
<Fixed_Header> | ||
<File_Name>GRANULE/L1C_T43QDG_A031305_20230305T053523/QI_DATA/GENERAL_QUALITY.xml</File_Name> | ||
<File_Description>Report produced by OLQC-SC</File_Description> | ||
<Notes/> | ||
<Mission>S2B</Mission> | ||
<File_Class>OPER</File_Class> | ||
<File_Type>REP_OLQCPA</File_Type> | ||
<Validity_Period> | ||
<Validity_Start>UTC=2022-08-30T00:25:00</Validity_Start> | ||
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop> | ||
</Validity_Period> | ||
<File_Version>2</File_Version> | ||
<Source> | ||
<System>OLQC-SC</System> | ||
<Creator>OLQC-SC</Creator> | ||
<Creator_Version>06.01.00</Creator_Version> | ||
<Creation_Date>UTC=2023-03-05T08:36:44</Creation_Date> | ||
</Source> | ||
</Fixed_Header> | ||
</Earth_Explorer_Header> | ||
<Data_Block type="xml"> | ||
<report date="2023-03-05T08:36:44.399Z" gippVersion="1.0" globalStatus="FAILED"> | ||
<checkList> | ||
<parentID>S2B_OPER_GIP_OLQCPA_MPC__20220715T000042_V20220830T002500</parentID> | ||
<name>GENERAL_QUALITY</name> | ||
<version>1.0</version> | ||
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" url="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/"/> | ||
<check> | ||
<inspection creation="2023-03-05T08:36:44.100Z" duration="246" execution="2023-03-05T08:36:44.145Z" id="checkMetadata" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/> | ||
<message contentType="text/plain">Metadata file is present.</message> | ||
</check> | ||
<check> | ||
<inspection creation="2023-03-05T08:36:45.387Z" duration="81" execution="2023-03-05T08:36:45.397Z" id="Solar_Angle_Zenith" item="S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/> | ||
<message contentType="text/plain">Zenith solar angle (37.349671777297) is within the threshold limit (82.0) </message> | ||
<extraValues> | ||
<value name="EXPECTED">82.0</value> | ||
<value name="SOLAR_ZENITH_ANGLE">37.349671777297</value> | ||
</extraValues> | ||
</check> | ||
<check> | ||
<inspection creation="2023-03-05T08:36:45.870Z" duration="23651" execution="2023-03-05T08:36:45.877Z" id="Data_Loss" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="FAILED"/> | ||
<message contentType="text/plain">There is data loss in this tile</message> | ||
<extraValues> | ||
<value name="Affected_Bands">B01 B02 B03 B09 B10 B11 B12 B8A</value> | ||
</extraValues> | ||
</check> | ||
</checkList> | ||
</report> | ||
</Data_Block> | ||
</Earth_Explorer_File> |
Oops, something went wrong.