Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for processing of floodscan polygon metadata #25

Merged
merged 1 commit into from
Nov 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions src/utils/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
import numpy as np
import pandas as pd
import rioxarray as rxr
from rioxarray.exceptions import NoDataInBounds

from src.config.settings import LOG_LEVEL, load_pipeline_config
from src.utils.cloud_utils import get_container_client
from src.utils.cog_utils import get_cog_url
from src.utils.database_utils import create_polygon_table, postgres_upsert
from src.utils.iso3_utils import get_iso3_data, load_shp_from_azure
from src.utils.raster_utils import fast_zonal_stats, prep_raster, rasterize_admin
from src.utils.raster_utils import (
fast_zonal_stats,
prep_raster,
rasterize_admin,
)

logger = logging.getLogger(__name__)
coloredlogs.install(level=LOG_LEVEL, logger=logger)
Expand Down Expand Up @@ -57,12 +62,16 @@ def get_single_cog(dataset, mode):
container_client = get_container_client(mode, "raster")
config = load_pipeline_config(dataset)
prefix = config["blob_prefix"]
cogs_list = [x.name for x in container_client.list_blobs(name_starts_with=prefix)]
cogs_list = [
x.name for x in container_client.list_blobs(name_starts_with=prefix)
]
cog_url = get_cog_url(mode, cogs_list[0])
return rxr.open_rasterio(cog_url, chunks="auto")


def process_polygon_metadata(engine, mode, upsampled_resolution, sel_iso3s=None):
def process_polygon_metadata(
engine, mode, upsampled_resolution, sel_iso3s=None
):
"""
Process and store polygon metadata for all administrative levels and datasets.

Expand Down Expand Up @@ -97,28 +106,46 @@ def process_polygon_metadata(engine, mode, upsampled_resolution, sel_iso3s=None)
for dataset in datasets:
da = get_single_cog(dataset, mode)
input_resolution = da.rio.resolution()
gdf_adm0 = gpd.read_file(f"{td}/{iso3.lower()}_adm0.shp")
gdf_adm0 = gpd.read_file(
f"{td}/{iso3.lower()}_adm0.shp"
)
# We want all values to be unique, so that we can count the total
# number of unique cells from the raw source that contribute to the stats
da.values = np.arange(da.size).reshape(da.shape)
da = da.astype(np.float32)
# Dummy `date` dimension to pass `validate_dims`
da = da.expand_dims({"date": 1})

try:
da_clipped = prep_raster(da, gdf_adm0)
except NoDataInBounds:
logger.error(
f"{dataset} has no coverage at adm level {i}"
)
continue

da_clipped = prep_raster(da, gdf_adm0, logger=logger)
output_resolution = da_clipped.rio.resolution()
upscale_factor = input_resolution[0] / output_resolution[0]
upscale_factor = (
input_resolution[0] / output_resolution[0]
)

src_transform = da_clipped.rio.transform()
src_width = da_clipped.rio.width
src_height = da_clipped.rio.height

admin_raster = rasterize_admin(
gdf, src_width, src_height, src_transform, all_touched=False
gdf,
src_width,
src_height,
src_transform,
all_touched=False,
)
adm_ids = gdf[f"ADM{i}_PCODE"]
n_adms = len(adm_ids)

results = fast_zonal_stats(
da_clipped.values[0],
da_clipped.values[0][0],
admin_raster,
n_adms,
stats=["count", "unique"],
Expand Down