Skip to content

Commit

Permalink
Fix potential memory leaks by confining use of pyvips to subprocesses (
Browse files Browse the repository at this point in the history
…#117)

Calls to `libvips` that cause errors are suspected to cause memory leaks
in libvips. In order to prevent this, this PR confines calls to pyvips
to subprocesses spawned using `ProcessPoolExecutor`.

---------

Co-authored-by: James Meakin <[email protected]>
  • Loading branch information
pkcakeout and jmsmkn authored Jan 23, 2025
1 parent a4c14a8 commit d51d72b
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 110 deletions.
6 changes: 5 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# History

## 0.15.0 (2024-01-06)
## 0.15.1 (2025-01-23)

* Fix potential memory leaks by confining use of `pyvips` to subprocesses

## 0.15.0 (2025-01-06)

* Removed support for Python 3.9

Expand Down
75 changes: 22 additions & 53 deletions panimg/image_builders/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from collections import defaultdict
from collections.abc import Callable, Iterator
from concurrent.futures import ProcessPoolExecutor
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import TemporaryDirectory
Expand All @@ -17,17 +18,6 @@
from panimg.image_builders.dicom import get_dicom_headers_by_study
from panimg.models import MAXIMUM_SEGMENTS_LENGTH, ColorSpace, TIFFImage

try:
import openslide
except (OSError, ModuleNotFoundError):
openslide = False

try:
import pyvips
except OSError:
pyvips = False


DICOM_WSI_STORAGE_ID = "1.2.840.10008.5.1.4.1.1.77.1.6"


Expand Down Expand Up @@ -263,6 +253,8 @@ def _load_with_tiff(
def _load_with_openslide(
*, gc_file: GrandChallengeTiffFile
) -> GrandChallengeTiffFile:
import openslide

open_slide_file = openslide.open_slide(str(gc_file.path.absolute()))
gc_file = _extract_openslide_properties(
gc_file=gc_file, image=open_slide_file
Expand Down Expand Up @@ -320,7 +312,6 @@ def _convert(
*,
files: list[Path],
associated_files_getter: Callable[[Path], list[Path]] | None,
converter,
output_directory: Path,
file_errors: dict[Path, list[str]],
) -> list[GrandChallengeTiffFile]:
Expand All @@ -334,12 +325,13 @@ def _convert(
if associated_files_getter:
associated_files = associated_files_getter(gc_file.path)

tiff_file = _convert_to_tiff(
path=file,
pk=gc_file.pk,
converter=converter,
output_directory=output_directory,
)
with ProcessPoolExecutor(max_workers=1) as executor:
tiff_file = executor.submit(
_convert_to_tiff,
path=file,
pk=gc_file.pk,
output_directory=output_directory,
).result()
except Exception as e:
file_errors[file].append(
format_error(
Expand All @@ -356,30 +348,24 @@ def _convert(
return converted_files


def _convert_to_tiff(
*, path: Path, pk: UUID, converter, output_directory: Path
) -> Path:
def _convert_to_tiff(*, path: Path, pk: UUID, output_directory: Path) -> Path:
import pyvips

major = pyvips.base.version(0)
minor = pyvips.base.version(1)

if not (major > 8 or (major == 8 and minor >= 10)):
raise RuntimeError(
f"libvips {major}.{minor} is too old - requires >= 8.10"
)

new_file_name = output_directory / path.name / f"{pk}.tif"
new_file_name.parent.mkdir()

image = converter.Image.new_from_file(
image = pyvips.Image.new_from_file(
str(path.absolute()), access="sequential"
)

using_old_vips = (
pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10
)
if (
using_old_vips
and image.get("xres") == 1
and "openslide.mpp-x" in image.get_fields()
):
# correct xres and yres if they have default value of 1
# due to a bug that is resolved in pyvips 8.10
x_res = 1000.0 / float(image.get("openslide.mpp-x"))
y_res = 1000.0 / float(image.get("openslide.mpp-y"))
image = image.copy(xres=x_res, yres=y_res)

image.write_to_file(
str(new_file_name.absolute()),
tile=True,
Expand Down Expand Up @@ -470,7 +456,6 @@ def associated_files(file_path: Path):
def _load_gc_files(
*,
files: set[Path],
converter,
output_directory: Path,
file_errors: DefaultDict[Path, list[str]],
) -> list[GrandChallengeTiffFile]:
Expand Down Expand Up @@ -503,7 +488,6 @@ def _load_gc_files(
converted_files = _convert(
files=complex_files,
associated_files_getter=handler,
converter=converter,
output_directory=output_directory,
file_errors=file_errors,
)
Expand All @@ -525,26 +509,11 @@ def _load_gc_files(
def image_builder_tiff( # noqa: C901
*, files: set[Path]
) -> Iterator[TIFFImage]:
if openslide is False:
raise ImportError(
f"Could not import openslide, which is required for the "
f"{__name__} image builder. Either ensure that libopenslide-dev "
f"is installed or remove {__name__} from your list of builders."
)

if pyvips is False:
raise ImportError(
f"Could not import pyvips, which is required for the "
f"{__name__} image builder. Either ensure that libvips-dev "
f"is installed or remove {__name__} from your list of builders."
)

file_errors: DefaultDict[Path, list[str]] = defaultdict(list)

with TemporaryDirectory() as output_directory:
loaded_files = _load_gc_files(
files=files,
converter=pyvips,
output_directory=Path(output_directory),
file_errors=file_errors,
)
Expand Down
21 changes: 7 additions & 14 deletions panimg/post_processors/tiff_to_dzi.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
import logging
from concurrent.futures import ProcessPoolExecutor

from panimg.models import ImageType, PanImgFile, PostProcessorResult
from panimg.settings import DZI_TILE_SIZE

try:
import pyvips
except OSError:
pyvips = False

logger = logging.getLogger(__name__)


def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult:
if pyvips is False:
raise ImportError(
f"Could not import pyvips, which is required for the "
f"{__name__} post processor. Either ensure that libvips-dev "
f"is installed or remove {__name__} from your list of post "
f"processors."
)

new_image_files: set[PanImgFile] = set()

for file in image_files:
if file.image_type == ImageType.TIFF:
try:
result = _create_dzi_image(tiff_file=file)
with ProcessPoolExecutor(max_workers=1) as executor:
result = executor.submit(
_create_dzi_image, tiff_file=file
).result()
except Exception as e:
logger.warning(f"Could not create DZI for {file}: {e}")
continue
Expand All @@ -36,6 +27,8 @@ def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult:


def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult:
import pyvips

# Creates a dzi file and corresponding tiles in directory {pk}_files
dzi_output = tiff_file.file.parent / str(tiff_file.image_id)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "panimg"
version = "0.15.0"
version = "0.15.1"
description = "Conversion of medical images to MHA and TIFF."
license = "Apache-2.0"
authors = ["James Meakin <[email protected]>"]
Expand Down
132 changes: 91 additions & 41 deletions tests/test_tiff.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
import os
import shutil
from collections import defaultdict
from pathlib import Path
from unittest.mock import MagicMock, Mock
from uuid import uuid4

import pytest
import pyvips
import tifffile as tiff_lib
from pytest import approx
from tifffile import tifffile

from panimg.exceptions import ValidationError
from panimg.image_builders.tiff import (
GrandChallengeTiffFile,
_convert,
_extract_tags,
_get_color_space,
_get_mrxs_files,
_load_with_openslide,
_load_with_tiff,
image_builder_tiff,
Expand Down Expand Up @@ -302,41 +297,6 @@ def test_image_builder_tiff(tmpdir_factory):
assert len(image_builder_result.new_image_files) == 3


def test_handle_complex_files(tmpdir_factory):
# Copy resource files to writable temp directory
temp_dir = Path(tmpdir_factory.mktemp("temp") / "resources")
shutil.copytree(RESOURCE_PATH / "complex_tiff", temp_dir)
files = [Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]]

# set up mock object to mock pyvips
properties = {
"xres": 1,
"yres": 1,
"openslide.mpp-x": 0.2525,
"openslide.mpp-y": 0.2525,
}

mock_converter = MagicMock(pyvips)
mock_image = mock_converter.Image.new_from_file.return_value
mock_image.get = Mock(return_value=1)
mock_image.get_fields = Mock(return_value=properties)

_convert(
files=files,
associated_files_getter=_get_mrxs_files,
converter=mock_converter,
output_directory=Path(tmpdir_factory.mktemp("output")),
file_errors=defaultdict(list),
)

if pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10:
# work-around calculation of xres and yres in _convert_to_tiff function
mock_image.copy.assert_called()
assert "xres" in mock_image.copy.call_args[1]
else:
mock_image.copy.assert_not_called()


@pytest.mark.xfail(
reason="skip for now as we don't want to upload a large testset"
)
Expand Down Expand Up @@ -381,4 +341,94 @@ def test_error_handling(tmpdir_factory):
output_directory=Path(tmpdir_factory.mktemp("output")),
)

assert len(image_builder_result.file_errors) == 14
output = {k.name: v for k, v in image_builder_result.file_errors.items()}

assert output == {
"Mirax2-Fluorescence-1.mrxs": [
"TIFF image builder: Could not convert file to TIFF: "
"Mirax2-Fluorescence-1.mrxs, "
"error:unable to call VipsForeignLoadOpenslideFile\n "
"openslide2vips: opening slide: "
"Index.dat doesn't have expected version\n",
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.vms": [
"TIFF image builder: Could not convert file to TIFF: "
"CMU-1-40x - 2010-01-12 13.24.05.vms, "
"error:unable to call VipsForeignLoadOpenslideFile\n "
"openslide2vips: opening slide: Can't validate JPEG 0: "
"No restart markers\n",
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0001.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Index.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(0,1).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05_map2.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Slidedat.ini": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05_macro.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.opt": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(1,0).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0000.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(1,1).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0002.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
}

0 comments on commit d51d72b

Please sign in to comment.