Skip to content

Commit

Permalink
Allow setting of size limit and directory (#76)
Browse files Browse the repository at this point in the history
* Allow setting of size limit and directory

* Fix flake8 by using SIZE_LIMIT

* Replace allow_caching with backend_config

Rather than a single boolean, pass a dictionary of options
through the resolve and infer methods to allow for more
extensible mechanisms. The `cache` key replaces most current
uses of the variable.

see also: spacetx/starfish#789

* Support cache.debug property (uses print)

This would be better implemented with the logging module but
that requires more discussion and setup.

* Require cache.directory for caching

* Update test_write.py

* Update backend_config keys

* Fix Reader.parse_doc propagation

* Fix cache size param

* Disable caching for size_limit=0

* Only accept dicts as backend_config

* Update test to use size_limit=0
joshmoore authored and ttung committed Dec 14, 2018
1 parent b95498d commit 2836198
Showing 4 changed files with 45 additions and 23 deletions.
3 changes: 2 additions & 1 deletion slicedimage/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from ._base import ChecksumValidationError
from ._caching import CachingBackend
from ._caching import CachingBackend, SIZE_LIMIT
from ._disk import DiskBackend
from ._http import HttpBackend

@@ -11,4 +11,5 @@
ChecksumValidationError,
DiskBackend,
HttpBackend,
SIZE_LIMIT,
]
4 changes: 2 additions & 2 deletions slicedimage/backends/_caching.py
Original file line number Diff line number Diff line change
@@ -15,10 +15,10 @@ class CachingBackend(Backend):
_LOCK = Lock()
_CACHE = {}

def __init__(self, cacheroot, authoritative_backend):
def __init__(self, cacheroot, authoritative_backend, size_limit=SIZE_LIMIT):
with CachingBackend._LOCK:
if cacheroot not in CachingBackend._CACHE:
CachingBackend._CACHE[cacheroot] = Cache(cacheroot, size=int(SIZE_LIMIT))
CachingBackend._CACHE[cacheroot] = Cache(cacheroot, size_limit=int(size_limit))
self._cache = CachingBackend._CACHE[cacheroot]
self._authoritative_backend = authoritative_backend

59 changes: 40 additions & 19 deletions slicedimage/io.py
Original file line number Diff line number Diff line change
@@ -11,24 +11,45 @@
from six.moves import urllib

from slicedimage.urlpath import pathjoin, pathsplit
from .backends import CachingBackend, DiskBackend, HttpBackend
from .backends import CachingBackend, DiskBackend, HttpBackend, SIZE_LIMIT
from ._collection import Collection
from ._formats import ImageFormat
from ._tile import Tile
from ._tileset import TileSet


def infer_backend(baseurl, allow_caching=True):
def infer_backend(baseurl, backend_config=None):
"""
Guess the backend based on the format of `baseurl`, the consistent part of the URL or file path.
The backend_config dictionary can contain flexible parameters for the different backends.
Caching parameter keys include:
- ["caching"]["directory"] (default: None which disables caching)
- ["caching"]["debug"] (default: False)
- ["caching"]["size_limit"] (default: SIZE_LIMIT)
"""

parsed = urllib.parse.urlparse(baseurl)

if parsed.scheme in ("http", "https"):
backend = HttpBackend(baseurl)
if allow_caching:
backend = CachingBackend(os.path.expanduser("~/.starfish-cache"), backend)

cache_config = {}
if isinstance(backend_config, dict):
cache_config = backend_config.get("caching", cache_config)

cache_dir = cache_config.get("directory", None)
if cache_dir is not None:
cache_dir = os.path.expanduser(cache_dir)
size_limit = cache_config.get("size_limit", SIZE_LIMIT)
if size_limit > 0:
debug = cache_config.get("debug", False)
if debug:
print("> caching {} to {} (size_limit: {})".format(
baseurl, cache_dir, size_limit))
backend = CachingBackend(cache_dir, backend, size_limit)
elif parsed.scheme == "file":
backend = DiskBackend(parsed.path)
else:
@@ -38,36 +59,36 @@ def infer_backend(baseurl, allow_caching=True):
return backend


def resolve_path_or_url(path_or_url, allow_caching=True):
def resolve_path_or_url(path_or_url, backend_config=None):
"""
Given either a path (absolute or relative), or a URL, attempt to resolve it. Returns a tuple
consisting of: a :py:class:`slicedimage.backends._base.Backend`, the basename of the object, and
the baseurl of the object.
"""
try:
return resolve_url(path_or_url, allow_caching=allow_caching)
return resolve_url(path_or_url, backend_config=None)
except ValueError:
if os.path.isfile(path_or_url):
return resolve_url(
os.path.basename(path_or_url),
baseurl="file://{}".format(os.path.dirname(os.path.abspath(path_or_url))),
allow_caching=allow_caching,
backend_config=backend_config
)
raise


def _resolve_absolute_url(absolute_url, allow_caching):
def _resolve_absolute_url(absolute_url, backend_config):
"""
Given a string that is an absolute URL, return a tuple consisting of: a
:py:class:`slicedimage.backends._base.Backend`, the basename of the object, and the baseurl of
the object.
"""
splitted = pathsplit(absolute_url)
backend = infer_backend(splitted[0], allow_caching)
backend = infer_backend(splitted[0], backend_config)
return backend, splitted[1], splitted[0]


def resolve_url(name_or_url, baseurl=None, allow_caching=True):
def resolve_url(name_or_url, baseurl=None, backend_config=None):
"""
Given a string that can either be a name or a fully qualified url, return a tuple consisting of:
a :py:class:`slicedimage.backends._base.Backend`, the basename of the object, and the baseurl of
@@ -78,19 +99,19 @@ def resolve_url(name_or_url, baseurl=None, allow_caching=True):
"""
try:
# assume it's a fully qualified url.
return _resolve_absolute_url(name_or_url, allow_caching)
return _resolve_absolute_url(name_or_url, backend_config)
except ValueError:
if baseurl is None:
# oh, we have no baseurl. punt.
raise
absolute_url = pathjoin(baseurl, name_or_url)
return _resolve_absolute_url(absolute_url, allow_caching)
return _resolve_absolute_url(absolute_url, backend_config)


class Reader(object):
@staticmethod
def parse_doc(name_or_url, baseurl, allow_caching=True):
backend, name, baseurl = resolve_url(name_or_url, baseurl, allow_caching)
def parse_doc(name_or_url, baseurl, backend_config=None):
backend, name, baseurl = resolve_url(name_or_url, baseurl, backend_config)
with backend.read_contextmanager(name) as fh:
reader = codecs.getreader("utf-8")
json_doc = json.load(reader(fh))
@@ -105,9 +126,9 @@ def parse_doc(name_or_url, baseurl, allow_caching=True):
raise KeyError(
"JSON document missing `version` field. Please specify the file format version.")

return parser.parse(json_doc, baseurl, allow_caching)
return parser.parse(json_doc, baseurl, backend_config)

def parse(self, json_doc, baseurl, allow_caching):
def parse(self, json_doc, baseurl, backend_config):
raise NotImplementedError()


@@ -149,12 +170,12 @@ class v0_0_0(object):
VERSION = "0.0.0"

class Reader(Reader):
def parse(self, json_doc, baseurl, allow_caching):
def parse(self, json_doc, baseurl, backend_config):
if CollectionKeys.CONTENTS in json_doc:
# this is a Collection
result = Collection(json_doc.get(CommonPartitionKeys.EXTRAS, None))
for name, relative_path_or_url in json_doc[CollectionKeys.CONTENTS].items():
collection = Reader.parse_doc(relative_path_or_url, baseurl)
collection = Reader.parse_doc(relative_path_or_url, baseurl, backend_config)
collection._name_or_url = relative_path_or_url
result.add_partition(name, collection)
elif TileSetKeys.TILES in json_doc:
@@ -171,7 +192,7 @@ def parse(self, json_doc, baseurl, allow_caching):

for tile_doc in json_doc[TileSetKeys.TILES]:
relative_path_or_url = tile_doc[TileKeys.FILE]
backend, name, _ = resolve_url(relative_path_or_url, baseurl, allow_caching)
backend, name, _ = resolve_url(relative_path_or_url, baseurl, backend_config)

tile_format_str = tile_doc.get(TileKeys.TILE_FORMAT, None)
if tile_format_str:
2 changes: 1 addition & 1 deletion tests/io/v0_0_0/test_write.py
Original file line number Diff line number Diff line change
@@ -162,7 +162,7 @@ def test_checksum_on_write(self):
image = slicedimage.Reader.parse_doc(
"tileset.json",
"file://{}".format(tempdir),
allow_caching=False,
{"cache": {"size_limit": 0}}, # disabled
)

with TemporaryDirectory() as output_tempdir, \

0 comments on commit 2836198

Please sign in to comment.