Skip to content

Commit

Permalink
Fix rechunk bug after spatial query (#861)
Browse files Browse the repository at this point in the history
* fix rechunk bug after spatial query

* code cleanup
  • Loading branch information
LucaMarconato authored Feb 4, 2025
1 parent cab8353 commit 0e20de6
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ exclude = [
"docs/_build",
"dist",
"setup.py",

]
line-length = 120
target-version = "py310"
Expand Down
26 changes: 24 additions & 2 deletions src/spatialdata/_core/query/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,32 @@ def _process_data_tree_query_result(query_result: DataTree) -> DataTree | None:
d = {k: Dataset({"image": d[k]}) for k in scales_to_keep}
result = DataTree.from_dict(d)

# Rechunk the data to avoid irregular chunks
# rechunk the data to avoid irregular chunks
coords = list(result["scale0"].coords.keys())
result = result.chunk({c: "auto" for c in coords})

from dask.array.core import _check_regular_chunks

# check that the rechunking into regular chunks worked
chunks_still_irregular = False
for scale in result:
result[scale]["image"] = result[scale]["image"].chunk("auto")
data = result[scale]["image"].data
chunks_still_irregular = chunks_still_irregular or not _check_regular_chunks(data.chunks)

if chunks_still_irregular:
# reported here: https://github.com/scverse/spatialdata/issues/821#issuecomment-2632201695
# seemingly due to this bug: https://github.com/dask/dask/issues/11713
CHUNK_SIZE = 1024
rechunk_strategy = {c: CHUNK_SIZE for c in coords}
if "c" in coords:
rechunk_strategy["c"] = result["scale0"]["image"].chunks[0][0]
result = result.chunk(rechunk_strategy)

for scale in result:
data = result[scale]["image"].data
assert _check_regular_chunks(data.chunks), (
f"Chunks are not regular for the {scale} of the queried data: {data.chunks}. Please report this bug."
)
return result


Expand Down
4 changes: 2 additions & 2 deletions tests/io/test_pyramids_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ def test_write_image_multiscale_performance(sdata_with_image: SpatialData, tmp_p

actual_num_chunk_writes = zarr_chunk_write_spy.call_count
actual_num_chunk_reads = zarr_chunk_read_spy.call_count
assert actual_num_chunk_writes == num_chunks_all_scales
assert actual_num_chunk_reads == num_chunks_scale0
assert actual_num_chunk_writes == num_chunks_all_scales.item()
assert actual_num_chunk_reads == num_chunks_scale0.item()

0 comments on commit 0e20de6

Please sign in to comment.