Skip to content

Commit

Permalink
Add version 1.0.0 of the file format. (#81)
Browse files Browse the repository at this point in the history
Internally, shapes are tracked as dicts mapping the dimension name to the size.  We maintain compatibility to read and write v0_0_0 files, however.

This is part of the resolution for spacetx/starfish#528.
  • Loading branch information
ttung authored Mar 11, 2019
1 parent 6cf6280 commit a61ab25
Show file tree
Hide file tree
Showing 20 changed files with 1,332 additions and 32 deletions.
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dimensions list Yes Names of the dimensions. Dimensions must
tiles dict Yes See Tiles_
shape dict Yes Maps each non-geometric dimension to the possible number of values for that
dimension for the tiles in this `Tile Set`_.
default_tile_shape tuple No Default pixel dimensions of a tile, ordered as y, x.
default_tile_shape dict No Mapping from the pixel dimensions to their sizes.
default_tile_format string No Default file format of the tiles.
zoom dict No See Zoom_
extras dict No Additional application-specific payload. The vocabulary and the schema are
Expand All @@ -89,8 +89,8 @@ indices dict Yes Maps each of the dimensions *not* in geometric s
of the dimensions here must be specified in the `Tile Set`_. The values of the indices
must be non-negative integers, and every value up to but not including the maximum
specified in the `shape` field of the `Tile Set`_ must be represented.
tile_shape tuple No Pixel dimensions of a tile, ordered as y, x. If this is not provided, it defaults to
`default_tile_shape` in the `Tile Set`_). If neither is provided, the tile shape is
tile_shape dict No Mapping from the pixel dimensions to their sizes. If this is not provided, it defaults
to `default_tile_shape` in the `Tile Set`_). If neither is provided, the tile shape is
inferred from actual file.
tile_format string No File format of the tile. If this is not provided, it defaults to `default_tile_format`
in the `Tile Set`_). If neither is provided, the tile format is inferred from actual
Expand Down
14 changes: 14 additions & 0 deletions slicedimage/_augmentedenum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from enum import Enum


class AugmentedEnum(Enum):
def __hash__(self):
return self.value.__hash__()

def __eq__(self, other):
if isinstance(other, type(self)):
return self.value == other.value
return self.value == str(other)

def __str__(self):
return self.value
6 changes: 6 additions & 0 deletions slicedimage/_dimensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._augmentedenum import AugmentedEnum


class DimensionNames(AugmentedEnum):
X = "x"
Y = "y"
32 changes: 24 additions & 8 deletions slicedimage/_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,42 @@

import warnings

from ._typeformatting import format_tile_coordinates, format_tile_indices
from ._dimensions import DimensionNames
from ._typeformatting import format_enum_keyed_dicts, format_tile_coordinates


class Tile(object):
def __init__(self, coordinates, indices, tile_shape=None, sha256=None, extras=None):
self.coordinates = format_tile_coordinates(coordinates)
self.indices = format_tile_indices(indices)
self._tile_shape = tuple(tile_shape) if tile_shape is not None else None
self.indices = format_enum_keyed_dicts(indices)
self._tile_shape = format_enum_keyed_dicts(tile_shape) if tile_shape is not None else None
self.sha256 = sha256
self.extras = {} if extras is None else extras

self._numpy_array = None
self._numpy_array_future = None

@staticmethod
def format_tuple_shape_to_dict_shape(tuple_shape):
if tuple_shape is None:
return None
return {DimensionNames.Y: tuple_shape[-2], DimensionNames.X: tuple_shape[-1]}

@staticmethod
def format_dict_shape_to_tuple_shape(dict_shape):
if dict_shape is None:
return None
return dict_shape[DimensionNames.Y], dict_shape[DimensionNames.X]

@property
def tile_shape(self):
if self._tile_shape is None:
warnings.warn(
"Decoding tile just to obtain shape. It is recommended to include the tile shape "
"in the tileset document to avoid this."
)
self._tile_shape = self._numpy_array_future().shape
shape = self._numpy_array_future().shape
self._tile_shape = Tile.format_tuple_shape_to_dict_shape(shape)

return self._tile_shape

Expand All @@ -35,19 +49,21 @@ def numpy_array(self):
result = self._numpy_array_future()

if self._tile_shape is not None:
assert self._tile_shape == result.shape
self._tile_shape = result.shape
assert Tile.format_dict_shape_to_tuple_shape(self._tile_shape) == result.shape
else:
self._tile_shape = Tile.format_tuple_shape_to_dict_shape(result.shape)

return result

@numpy_array.setter
def numpy_array(self, numpy_array):
if self._tile_shape is not None:
assert self._tile_shape == numpy_array.shape
assert Tile.format_dict_shape_to_tuple_shape(self._tile_shape) == numpy_array.shape
else:
self._tile_shape = Tile.format_tuple_shape_to_dict_shape(numpy_array.shape)

self._numpy_array = numpy_array
self._numpy_array_future = None
self._tile_shape = self._numpy_array.shape

def set_numpy_array_future(self, future):
"""
Expand Down
20 changes: 13 additions & 7 deletions slicedimage/_tileset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from ._typeformatting import format_tileset_dimensions, format_tileset_shape
from ._dimensions import DimensionNames
from ._typeformatting import (
format_enum_keyed_dicts,
format_tileset_dimensions,
format_tileset_shape,
)


class TileSet(object):
Expand All @@ -13,7 +18,8 @@ def __init__(
extras=None):
self.dimensions = format_tileset_dimensions(dimensions)
self.shape = format_tileset_shape(shape)
self.default_tile_shape = None if default_tile_shape is None else tuple(default_tile_shape)
self.default_tile_shape = (format_enum_keyed_dicts(default_tile_shape)
if default_tile_shape is not None else None)
self.default_tile_format = default_tile_format
self.extras = {} if extras is None else extras
self._tiles = []
Expand All @@ -24,7 +30,7 @@ def __repr__(self):
# get dimensions of optional shapes
attributes = [
"{k}: {v}".format(k=k, v=self.shape[k])
for k in self.dimensions - {'y', 'x'}
for k in self.dimensions - {DimensionNames.Y, DimensionNames.X}
if k in self.shape
]
xmin, xmax, ymin, ymax = float("inf"), float("-inf"), float("inf"), float("-inf")
Expand All @@ -39,10 +45,10 @@ def __repr__(self):
if shape is None:
shape = tile.tile_shape

xmin = min(xmin, shape[0])
xmax = max(xmax, shape[0])
ymin = min(ymin, shape[1])
ymax = max(ymax, shape[1])
xmin = min(xmin, shape[DimensionNames.X])
xmax = max(xmax, shape[DimensionNames.X])
ymin = min(ymin, shape[DimensionNames.Y])
ymax = max(ymax, shape[DimensionNames.Y])

if xmin == xmax:
attributes.append("x: {}".format(xmin))
Expand Down
4 changes: 2 additions & 2 deletions slicedimage/_typeformatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ def format_tile_coordinates(tile_dimensions):
return result


def format_tile_indices(tile_dimensions):
def format_enum_keyed_dicts(enum_keyed_dict):
"""
Given a dictionary mapping keys to values, where the keys may either be strings or enums,
return a new dictionary with the same contents, except the keys are converted to strings.
"""
result = dict()
for name, value in tile_dimensions.items():
for name, value in enum_keyed_dict.items():
result[_str_or_enum_to_str(name)] = value
return result

Expand Down
162 changes: 156 additions & 6 deletions slicedimage/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from packaging import version
from six.moves import urllib

from slicedimage.urlpath import pathjoin, pathsplit
from .backends import CachingBackend, DiskBackend, HttpBackend, SIZE_LIMIT
from .urlpath import pathjoin, pathsplit
from ._collection import Collection
from ._formats import ImageFormat
from ._tile import Tile
from ._tileset import TileSet
from ._typeformatting import format_enum_keyed_dicts


def infer_backend(baseurl, backend_config=None):
Expand Down Expand Up @@ -118,7 +119,9 @@ def parse_doc(name_or_url, baseurl, backend_config=None):
doc_version = version.parse(json_doc[CommonPartitionKeys.VERSION])

try:
if doc_version >= version.parse(v0_0_0.VERSION):
if doc_version >= version.parse(v1_0_0.VERSION):
parser = v1_0_0.Reader()
elif doc_version >= version.parse(v0_0_0.VERSION):
parser = v0_0_0.Reader()
else:
raise ValueError("Unrecognized version number")
Expand All @@ -134,8 +137,10 @@ def parse(self, json_doc, baseurl, backend_config):

class Writer(object):
@staticmethod
def write_to_path(partition, path, pretty=False, *args, **kwargs):
document = v0_0_0.Writer().generate_partition_document(
def write_to_path(partition, path, pretty=False, version_class=None, *args, **kwargs):
if version_class is None:
version_class = v1_0_0
document = version_class.Writer().generate_partition_document(
partition, path, pretty, *args, **kwargs)
indent = 4 if pretty else None
with open(path, "w") as fh:
Expand Down Expand Up @@ -182,6 +187,151 @@ def parse(self, json_doc, baseurl, backend_config):
imageformat = json_doc.get(TileSetKeys.DEFAULT_TILE_FORMAT, None)
if imageformat is not None:
imageformat = ImageFormat[imageformat]

result = TileSet(
tuple(json_doc[TileSetKeys.DIMENSIONS]),
json_doc[TileSetKeys.SHAPE],
Tile.format_tuple_shape_to_dict_shape(
json_doc.get(TileSetKeys.DEFAULT_TILE_SHAPE, None)),
imageformat,
json_doc.get(TileSetKeys.EXTRAS, None),
)

for tile_doc in json_doc[TileSetKeys.TILES]:
relative_path_or_url = tile_doc[TileKeys.FILE]
backend, name, _ = resolve_url(relative_path_or_url, baseurl, backend_config)

tile_format_str = tile_doc.get(TileKeys.TILE_FORMAT, None)
if tile_format_str:
tile_format = ImageFormat[tile_format_str]
else:
tile_format = result.default_tile_format
if tile_format is None:
# Still none :(
extension = os.path.splitext(name)[1].lstrip(".")
tile_format = ImageFormat.find_by_extension(extension)
checksum = tile_doc.get(TileKeys.SHA256, None)
tile = Tile(
tile_doc[TileKeys.COORDINATES],
tile_doc[TileKeys.INDICES],
tile_shape=Tile.format_tuple_shape_to_dict_shape(
tile_doc.get(TileKeys.TILE_SHAPE, None)),
sha256=checksum,
extras=tile_doc.get(TileKeys.EXTRAS, None),
)

def future_maker(_source_fh_contextmanager, _tile_format):
"""Produces a future that reads from a file and decodes according to the
specified file format."""
def _actual_future():
with _source_fh_contextmanager as fh:
return _tile_format.reader_func(fh)

return _actual_future

tile.set_numpy_array_future(
future_maker(
backend.read_contextmanager(name, checksum_sha256=checksum),
tile_format))
result.add_tile(tile)
else:
raise ValueError(
"JSON doc does not appear to be a collection partition or a tileset "
"partition. JSON doc must contain either a {contents} field pointing to a "
"tile manifest, or it must contain a {tiles} field that specifies a set of "
"tiles.".format(
contents=CollectionKeys.CONTENTS, tiles=TileSetKeys.TILES))

return result

class Writer(Writer):
def generate_partition_document(
self,
partition,
path,
pretty=False,
partition_path_generator=Writer.default_partition_path_generator,
tile_opener=Writer.default_tile_opener,
tile_format=ImageFormat.NUMPY,
):
json_doc = {
CommonPartitionKeys.VERSION: v0_0_0.VERSION,
CommonPartitionKeys.EXTRAS: partition.extras,
}
if isinstance(partition, Collection):
json_doc[CollectionKeys.CONTENTS] = dict()
for partition_name, partition in partition._partitions.items():
partition_path = partition_path_generator(path, partition_name)
Writer.write_to_path(
partition, partition_path, pretty,
version_class=v0_0_0,
partition_path_generator=partition_path_generator,
tile_opener=tile_opener,
tile_format=tile_format,
)
json_doc[CollectionKeys.CONTENTS][partition_name] = os.path.basename(
partition_path)
return json_doc
elif isinstance(partition, TileSet):
json_doc[TileSetKeys.DIMENSIONS] = tuple(partition.dimensions)
json_doc[TileSetKeys.SHAPE] = partition.shape
json_doc[TileSetKeys.TILES] = []

if partition.default_tile_shape is not None:
json_doc[TileSetKeys.DEFAULT_TILE_SHAPE] = \
Tile.format_dict_shape_to_tuple_shape(partition.default_tile_shape)
if partition.default_tile_format is not None:
json_doc[TileSetKeys.DEFAULT_TILE_FORMAT] = partition.default_tile_format.name
if len(partition.extras) != 0:
json_doc[TileSetKeys.EXTRAS] = partition.extras

for tile in partition._tiles:
tiledoc = {
TileKeys.COORDINATES: tile.coordinates,
TileKeys.INDICES: tile.indices,
}

with tile_opener(path, tile, tile_format.file_ext) as tile_fh:
buffer_fh = BytesIO()
tile.write(buffer_fh, tile_format)

buffer_fh.seek(0)
tile.sha256 = hashlib.sha256(buffer_fh.getvalue()).hexdigest()

buffer_fh.seek(0)
tile_fh.write(buffer_fh.read())
tiledoc[TileKeys.FILE] = os.path.basename(tile_fh.name)

if tile.tile_shape is not None:
tiledoc[TileKeys.TILE_SHAPE] = \
Tile.format_dict_shape_to_tuple_shape(tile.tile_shape)
tiledoc[TileKeys.SHA256] = tile.sha256
if tile_format is not None:
tiledoc[TileKeys.TILE_FORMAT] = tile_format.name
if len(tile.extras) != 0:
tiledoc[TileKeys.EXTRAS] = tile.extras
json_doc[TileSetKeys.TILES].append(tiledoc)

return json_doc


class v1_0_0(object):
VERSION = "0.1.0"

class Reader(Reader):
def parse(self, json_doc, baseurl, backend_config):
if CollectionKeys.CONTENTS in json_doc:
# this is a Collection
result = Collection(json_doc.get(CommonPartitionKeys.EXTRAS, None))
for name, relative_path_or_url in json_doc[CollectionKeys.CONTENTS].items():
collection = Reader.parse_doc(relative_path_or_url, baseurl, backend_config)
collection._name_or_url = relative_path_or_url
result.add_partition(name, collection)
elif TileSetKeys.TILES in json_doc:
imageformat = json_doc.get(TileSetKeys.DEFAULT_TILE_FORMAT, None)
if imageformat is not None:
imageformat = ImageFormat[imageformat]

result = TileSet(
tuple(json_doc[TileSetKeys.DIMENSIONS]),
json_doc[TileSetKeys.SHAPE],
Expand Down Expand Up @@ -247,7 +397,7 @@ def generate_partition_document(
tile_format=ImageFormat.NUMPY,
):
json_doc = {
CommonPartitionKeys.VERSION: v0_0_0.VERSION,
CommonPartitionKeys.VERSION: v1_0_0.VERSION,
CommonPartitionKeys.EXTRAS: partition.extras,
}
if isinstance(partition, Collection):
Expand Down Expand Up @@ -293,7 +443,7 @@ def generate_partition_document(
tiledoc[TileKeys.FILE] = os.path.basename(tile_fh.name)

if tile.tile_shape is not None:
tiledoc[TileKeys.TILE_SHAPE] = tile.tile_shape
tiledoc[TileKeys.TILE_SHAPE] = format_enum_keyed_dicts(tile.tile_shape)
tiledoc[TileKeys.SHA256] = tile.sha256
if tile_format is not None:
tiledoc[TileKeys.TILE_FORMAT] = tile_format.name
Expand Down
Loading

0 comments on commit a61ab25

Please sign in to comment.