Skip to content

Commit

Permalink
Cleanup + add strided patches (#218)
Browse files Browse the repository at this point in the history
* rm unused args, fix #185

* account for empty hierarchy or patches

fix #195

* please mypy

* make error message more helpful if patch dir does not exist

* rm unused args in wsi patches dataset

fixes #185

* check that there are patches in the loaded file

fixes #195

* ensure thumbnail is RGB

fixes #216

* appease mypy

* test that openslide and tiffslide pad regions

fixes #203

* rm unused import

* handle symlinked slides dirs

fixes #214

* add strided patches

fixes #202

This adds a command line option '--patch-overlap-ratio' that controls
the level of overlap between adjacent patches. Negative values create
space between patches, and values closer to 1 makes patches overlap
more.

* save jsons with geojson ext

* print some logs

* use geojson ext for purple.json
  • Loading branch information
kaczmarj authored Feb 22, 2024
1 parent d52855a commit f39f92b
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 84 deletions.
73 changes: 60 additions & 13 deletions tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from wsinfer.wsi import HAS_OPENSLIDE
from wsinfer.wsi import HAS_TIFFSLIDE


@pytest.fixture
def tiff_image(tmp_path: Path) -> Path:
x = np.empty((4096, 4096, 3), dtype="uint8")
Expand Down Expand Up @@ -82,7 +83,7 @@ def test_cli_run_with_registered_models(
backend: str,
tiff_image: Path,
tmp_path: Path,
):
) -> None:
"""A regression test of the command 'wsinfer run'."""

reference_csv = Path(__file__).parent / "reference" / model / "purple.csv"
Expand Down Expand Up @@ -144,14 +145,14 @@ def test_cli_run_with_registered_models(
geojson_dir = results_dir / "model-outputs-geojson"
# result = runner.invoke(cli, ["togeojson", str(results_dir), str(geojson_dir)])
assert result.exit_code == 0
with open(geojson_dir / "purple.json") as f:
with open(geojson_dir / "purple.geojson") as f:
d: geojsonlib.GeoJSON = geojsonlib.load(f)
assert d.is_valid, "geojson not valid!"
assert len(d["features"]) == len(df_ref)

for geojson_row in d["features"]:
assert geojson_row["type"] == "Feature"
isinstance(geojson_row["id"] , str)
isinstance(geojson_row["id"], str)
assert geojson_row["geometry"]["type"] == "Polygon"
res = []
for i, prob_col in enumerate(prob_cols):
Expand All @@ -178,7 +179,7 @@ def test_cli_run_with_registered_models(
assert [df_coords] == geojson_row["geometry"]["coordinates"]


def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path):
def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path) -> None:
model = "breast-tumor-resnet34.tcga-brca"
reference_csv = Path(__file__).parent / "reference" / model / "purple.csv"
if not reference_csv.exists():
Expand Down Expand Up @@ -246,7 +247,7 @@ def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path):
), f"Column {prob_col} not allclose at atol=1e-07"


def test_cli_run_no_model_or_config(tmp_path: Path):
def test_cli_run_no_model_or_config(tmp_path: Path) -> None:
"""Test that --model or (--config and --model-path) is required."""
wsi_dir = tmp_path / "slides"
wsi_dir.mkdir()
Expand All @@ -265,7 +266,7 @@ def test_cli_run_no_model_or_config(tmp_path: Path):
assert "one of --model or (--config and --model-path) is required" in result.output


def test_cli_run_model_and_config(tmp_path: Path):
def test_cli_run_model_and_config(tmp_path: Path) -> None:
"""Test that (model and weights) or config is required."""
wsi_dir = tmp_path / "slides"
wsi_dir.mkdir()
Expand Down Expand Up @@ -298,7 +299,7 @@ def test_cli_run_model_and_config(tmp_path: Path):


@pytest.mark.xfail
def test_convert_to_sbu():
def test_convert_to_sbu() -> None:
# TODO: create a synthetic output and then convert it. Check that it is valid.
assert False

Expand Down Expand Up @@ -330,7 +331,7 @@ def test_patch_cli(
backend: str,
tmp_path: Path,
tiff_image: Path,
):
) -> None:
"""Test of 'wsinfer patch'."""
orig_slide_size = 4096
orig_slide_spacing = 0.25
Expand Down Expand Up @@ -380,7 +381,7 @@ def test_patch_cli(


# FIXME: parametrize this test across our models.
def test_jit_compile():
def test_jit_compile() -> None:
w = get_registered_model("breast-tumor-resnet34.tcga-brca")
model = get_pretrained_torch_module(w)

Expand Down Expand Up @@ -411,7 +412,7 @@ def test_jit_compile():
)


def test_issue_89():
def test_issue_89() -> None:
"""Do not fail if 'git' is not installed."""
model_obj = get_registered_model("breast-tumor-resnet34.tcga-brca")
d = _get_info_for_save(model_obj)
Expand All @@ -433,7 +434,7 @@ def test_issue_89():
os.environ["PATH"] = orig_path # reset path


def test_issue_94(tmp_path: Path, tiff_image: Path):
def test_issue_94(tmp_path: Path, tiff_image: Path) -> None:
"""Gracefully handle unreadable slides."""

# We have a valid tiff in 'tiff_image.parent'. We put in an unreadable file too.
Expand Down Expand Up @@ -461,7 +462,7 @@ def test_issue_94(tmp_path: Path, tiff_image: Path):
assert not results_dir.joinpath("model-outputs-csv").joinpath("bad.csv").exists()


def test_issue_97(tmp_path: Path, tiff_image: Path):
def test_issue_97(tmp_path: Path, tiff_image: Path) -> None:
"""Write a run_metadata file per run."""

runner = CliRunner()
Expand Down Expand Up @@ -502,11 +503,57 @@ def test_issue_97(tmp_path: Path, tiff_image: Path):
assert len(metas) == 2


def test_issue_125(tmp_path: Path):
def test_issue_125(tmp_path: Path) -> None:
"""Test that path in model config can be saved when a pathlib.Path object."""

w = get_registered_model("breast-tumor-resnet34.tcga-brca")
w.model_path = Path(w.model_path) # type: ignore
info = _get_info_for_save(w)
with open(tmp_path / "foo.json", "w") as f:
json.dump(info, f)


def test_issue_203(tiff_image: Path) -> None:
"""Test that openslide and tiffslide pad an image if an out-of-bounds region
is requested.
"""
import openslide
import tiffslide

with tiffslide.TiffSlide(tiff_image) as tslide:
w, h = tslide.dimensions
img = tslide.read_region((w, h), level=0, size=(256, 256))
assert img.size == (256, 256)
assert np.allclose(np.array(img), 0)
del tslide, img

with openslide.OpenSlide(tiff_image) as oslide:
w, h = oslide.dimensions
img = oslide.read_region((w, h), level=0, size=(256, 256))
assert img.size == (256, 256)
assert np.allclose(np.array(img), 0)


def test_issue_214(tmp_path: Path, tiff_image: Path) -> None:
"""Test that symlinked slides don't mess things up."""
link = tmp_path / "forlinks" / "arbitrary-link-name.tiff"
link.parent.mkdir(parents=True)
link.symlink_to(tiff_image)

runner = CliRunner()
results_dir = tmp_path / "inference"
result = runner.invoke(
cli,
[
"run",
"--wsi-dir",
str(link.parent),
"--results-dir",
str(results_dir),
"--model",
"breast-tumor-resnet34.tcga-brca",
],
)
assert result.exit_code == 0
assert (results_dir / "patches" / link.with_suffix(".h5").name).exists()
assert (results_dir / "model-outputs-csv" / link.with_suffix(".csv").name).exists()
10 changes: 3 additions & 7 deletions wsinfer/cli/convert_csv_to_sbubmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,21 +249,17 @@ def get_color(row: pd.Series) -> tuple[float, float, float]:
@click.command()
@click.argument(
"results_dir",
type=click.Path(
exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True
),
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
)
@click.argument(
"output",
type=click.Path(exists=False, path_type=Path, resolve_path=True),
type=click.Path(exists=False, path_type=Path),
)
@click.option(
"--wsi-dir",
required=True,
help="Directory with whole slide images.",
type=click.Path(
exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True
),
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
)
@click.option("--execution-id", required=True, help="Unique execution ID for this run.")
@click.option("--study-id", required=True, help="Study ID, like TCGA-BRCA.")
Expand Down
30 changes: 21 additions & 9 deletions wsinfer/cli/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,15 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-i",
"--wsi-dir",
type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, file_okay=False, path_type=Path),
required=True,
help="Directory containing whole slide images. This directory can *only* contain"
" whole slide images.",
)
@click.option(
"-o",
"--results-dir",
type=click.Path(file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(file_okay=False, path_type=Path),
required=True,
help="Directory to store results. If directory exists, will skip"
" whole slides for which outputs exist.",
Expand All @@ -212,7 +212,7 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-c",
"--config",
type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, dir_okay=False, path_type=Path),
help=(
"Path to configuration for the trained model. Use this option if the"
" model weights are not registered in wsinfer. Mutually exclusive with"
Expand All @@ -222,7 +222,7 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-p",
"--model-path",
type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, dir_okay=False, path_type=Path),
help=(
"Path to the pretrained model. Use only when --config is passed. Mutually "
"exclusive with --model."
Expand Down Expand Up @@ -303,6 +303,16 @@ def get_stdout(args: list[str]) -> str:
" area, it is filled with foreground. The default is 190um x 190um. The units of"
" this argument are microns squared.",
)
@click.option(
"--patch-overlap-ratio",
default=0.0,
type=click.FloatRange(min=None, max=1, max_open=True),
help="The ratio of overlap among patches. The default value of 0 produces"
" non-overlapping patches. A value in (0, 1) will produce overlapping patches."
" Negative values will add space between patches. A value of -1 would skip"
" every other patch. A value of 0.5 will provide 50%% of overlap between patches."
" Values must be in (-inf, 1).",
)
def run(
ctx: click.Context,
*,
Expand All @@ -321,6 +331,7 @@ def run(
seg_closing_kernel_size: int,
seg_min_object_size_um2: float,
seg_min_hole_size_um2: float,
patch_overlap_ratio: float = 0.0,
) -> None:
"""Run model inference on a directory of whole slide images.
Expand Down Expand Up @@ -349,9 +360,6 @@ def run(
"--config and --model-path must both be set if one is set."
)

wsi_dir = wsi_dir.resolve()
results_dir = results_dir.resolve()

if not wsi_dir.exists():
raise FileNotFoundError(f"Whole slide image directory not found: {wsi_dir}")

Expand Down Expand Up @@ -401,6 +409,7 @@ def run(
closing_kernel_size=seg_closing_kernel_size,
min_object_size_um2=seg_min_object_size_um2,
min_hole_size_um2=seg_min_hole_size_um2,
overlap=patch_overlap_ratio,
)

if not results_dir.joinpath("patches").exists():
Expand Down Expand Up @@ -438,9 +447,12 @@ def run(
with open(run_metadata_outpath, "w") as f:
json.dump(run_metadata, f, indent=2)

click.secho("Finished.", fg="green")

click.echo("Writing inference results to GeoJSON files")
csvs = list((results_dir / "model-outputs-csv").glob("*.csv"))
write_geojsons(csvs, results_dir, num_workers)

if qupath:
click.echo("Creating QuPath project with results")
make_qupath_project(wsi_dir, results_dir)

click.secho("Finished.", fg="green")
40 changes: 20 additions & 20 deletions wsinfer/cli/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
@click.option(
"-i",
"--wsi-dir",
type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, file_okay=False, path_type=Path),
required=True,
help="Directory containing whole slide images. This directory can *only* contain"
" whole slide images.",
)
@click.option(
"-o",
"--results-dir",
type=click.Path(file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(file_okay=False, path_type=Path),
required=True,
help="Directory to store patch results. If directory exists, will skip"
" whole slides for which outputs exist.",
Expand All @@ -32,41 +32,41 @@
help="Physical spacing of the patch in micrometers per pixel.",
)
@click.option(
"--thumbsize",
"--seg-thumbsize",
default=(2048, 2048),
type=(int, int),
help="The size of the slide thumbnail (in pixels) used for tissue segmentation."
" The aspect ratio is preserved, and the longest side will have length"
" max(thumbsize).",
)
@click.option(
"--median-filter-size",
"--seg-median-filter-size",
default=7,
type=click.IntRange(min=3),
help="The kernel size for median filtering. Must be greater than 1 and odd.",
)
@click.option(
"--binary-threshold",
"--seg-binary-threshold",
default=7,
type=click.IntRange(min=1),
help="The threshold for image binarization.",
)
@click.option(
"--closing-kernel-size",
"--seg-closing-kernel-size",
default=6,
type=click.IntRange(min=1),
help="The kernel size for binary closing (morphological operation).",
)
@click.option(
"--min-object-size-um2",
"--seg-min-object-size-um2",
default=200**2,
type=click.FloatRange(min=0),
help="The minimum size of an object to keep during tissue detection. If a"
" contiguous object is smaller than this area, it replaced with background."
" The default is 200um x 200um. The units of this argument are microns squared.",
)
@click.option(
"--min-hole-size-um2",
"--seg-min-hole-size-um2",
default=190**2,
type=click.FloatRange(min=0),
help="The minimum size of a hole to keep as a hole. If a hole is smaller than this"
Expand All @@ -78,23 +78,23 @@ def patch(
results_dir: str,
patch_size_px: int,
patch_spacing_um_px: float,
thumbsize: tuple[int, int],
median_filter_size: int,
binary_threshold: int,
closing_kernel_size: int,
min_object_size_um2: float,
min_hole_size_um2: float,
seg_thumbsize: tuple[int, int],
seg_median_filter_size: int,
seg_binary_threshold: int,
seg_closing_kernel_size: int,
seg_min_object_size_um2: float,
seg_min_hole_size_um2: float,
) -> None:
"""Patch a directory of whole slide iamges."""
segment_and_patch_directory_of_slides(
wsi_dir=wsi_dir,
save_dir=results_dir,
patch_size_px=patch_size_px,
patch_spacing_um_px=patch_spacing_um_px,
thumbsize=thumbsize,
median_filter_size=median_filter_size,
binary_threshold=binary_threshold,
closing_kernel_size=closing_kernel_size,
min_object_size_um2=min_object_size_um2,
min_hole_size_um2=min_hole_size_um2,
thumbsize=seg_thumbsize,
median_filter_size=seg_median_filter_size,
binary_threshold=seg_binary_threshold,
closing_kernel_size=seg_closing_kernel_size,
min_object_size_um2=seg_min_object_size_um2,
min_hole_size_um2=seg_min_hole_size_um2,
)
Loading

0 comments on commit f39f92b

Please sign in to comment.