diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index 2fa87d5a..eac4eaf0 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit 2fa87d5a629252dd8b85430ed9d2f425a8b062ed +Subproject commit eac4eaf0c2527887288aa55ce48238aff58f22a5 diff --git a/pyproject.toml b/pyproject.toml index c95c2571..3277be97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,7 +127,7 @@ exclude = [ "docs/_build", "dist", "setup.py", - + ] line-length = 120 target-version = "py310" diff --git a/src/spatialdata/_core/query/_utils.py b/src/spatialdata/_core/query/_utils.py index 0229d8bd..cb2c57b1 100644 --- a/src/spatialdata/_core/query/_utils.py +++ b/src/spatialdata/_core/query/_utils.py @@ -140,10 +140,32 @@ def _process_data_tree_query_result(query_result: DataTree) -> DataTree | None: d = {k: Dataset({"image": d[k]}) for k in scales_to_keep} result = DataTree.from_dict(d) - # Rechunk the data to avoid irregular chunks + # rechunk the data to avoid irregular chunks + coords = list(result["scale0"].coords.keys()) + result = result.chunk({c: "auto" for c in coords}) + + from dask.array.core import _check_regular_chunks + + # check that the rechunking into regular chunks worked + chunks_still_irregular = False for scale in result: - result[scale]["image"] = result[scale]["image"].chunk("auto") + data = result[scale]["image"].data + chunks_still_irregular = chunks_still_irregular or not _check_regular_chunks(data.chunks) + + if chunks_still_irregular: + # reported here: https://github.com/scverse/spatialdata/issues/821#issuecomment-2632201695 + # seemingly due to this bug: https://github.com/dask/dask/issues/11713 + CHUNK_SIZE = 1024 + rechunk_strategy = {c: CHUNK_SIZE for c in coords} + if "c" in coords: + rechunk_strategy["c"] = result["scale0"]["image"].chunks[0][0] + result = result.chunk(rechunk_strategy) + for scale in result: + data = result[scale]["image"].data + assert _check_regular_chunks(data.chunks), ( + f"Chunks are not regular for the {scale} of the queried data: {data.chunks}. Please report this bug." + ) return result diff --git a/tests/io/test_pyramids_performance.py b/tests/io/test_pyramids_performance.py index bd9d1a8f..f0ca31a2 100644 --- a/tests/io/test_pyramids_performance.py +++ b/tests/io/test_pyramids_performance.py @@ -82,5 +82,5 @@ def test_write_image_multiscale_performance(sdata_with_image: SpatialData, tmp_p actual_num_chunk_writes = zarr_chunk_write_spy.call_count actual_num_chunk_reads = zarr_chunk_read_spy.call_count - assert actual_num_chunk_writes == num_chunks_all_scales - assert actual_num_chunk_reads == num_chunks_scale0 + assert actual_num_chunk_writes == num_chunks_all_scales.item() + assert actual_num_chunk_reads == num_chunks_scale0.item()