Skip to content

Commit

Permalink
feature/tiledtiffreader (#387)
Browse files Browse the repository at this point in the history
* Prototype of bfio tiled tiff image reader

* Minimal bfio OME Tiled Tiff Reader

* Fixed code to pass CI checks

* Made tiled tiff reader serializable, added tests/benchmarks

* Fixed bug in output dimension ordering

* Finalize and cleanup

* Fixed bfio version

* Removed parameter from unit test

* Cleaned up documentation. Fast fail on multi-image files. Fail on non-local fs.

* Updated resource hash

* Added documentation, unit test for selected tiff reader
  • Loading branch information
Nicholas-Schaub authored Apr 13, 2022
1 parent f5b8803 commit 4bace71
Show file tree
Hide file tree
Showing 9 changed files with 562 additions and 2 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ optionally installed using `[...]` syntax.
- For a single additional supported format (e.g. ND2): `pip install aicsimageio[nd2]`
- For a single additional supported format (e.g. ND2), development head: `pip install "aicsimageio[nd2] @ git+https://github.com/AllenCellModeling/aicsimageio.git"`
- For a single additional supported format (e.g. ND2), specific tag (e.g. `v4.0.0.dev6`): `pip install "aicsimageio[nd2] @ git+https://github.com/AllenCellModeling/[email protected]"`
- For faster OME-TIFF reading with tile tags: `pip install aicsimageio[bfio]`
- For multiple additional supported formats: `pip install aicsimageio[base-imageio,nd2]`
- For all additional supported (and openly licensed) formats: `pip install aicsimageio[all]`
- Due to the GPL license, LIF support is not included with the `[all]` extra, and must be installed manually with `pip install aicsimageio readlif>=0.6.4`
Expand Down Expand Up @@ -338,4 +339,4 @@ _Free software: BSD-3-Clause_

_(The LIF component is licensed under GPLv3 and is not included in this package)_
_(The Bio-Formats component is licensed under GPLv2 and is not included in this package)_
_(The CZI component is licensed under GPLv3 and is not included in this package)_
_(The CZI component is licensed under GPLv3 and is not included in this package)_
2 changes: 2 additions & 0 deletions aicsimageio/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,14 @@
"oir": ["aicsimageio.readers.bioformats_reader.BioformatsReader"],
"ome": ["aicsimageio.readers.bioformats_reader.BioformatsReader"],
"ome.tif": [
"aicsimageio.readers.bfio_reader.OmeTiledTiffReader",
"aicsimageio.readers.ome_tiff_reader.OmeTiffReader",
"aicsimageio.readers.tiff_reader.TiffReader",
"aicsimageio.readers.bioformats_reader.BioformatsReader",
"aicsimageio.readers.default_reader.DefaultReader",
],
"ome.tiff": [
"aicsimageio.readers.bfio_reader.OmeTiledTiffReader",
"aicsimageio.readers.ome_tiff_reader.OmeTiffReader",
"aicsimageio.readers.tiff_reader.TiffReader",
"aicsimageio.readers.bioformats_reader.BioformatsReader",
Expand Down
2 changes: 2 additions & 0 deletions aicsimageio/readers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

if TYPE_CHECKING:
from .array_like_reader import ArrayLikeReader # noqa: F401
from .bfio_reader import OmeTiledTiffReader # noqa: F401
from .bioformats_reader import BioformatsReader # noqa: F401
from .czi_reader import CziReader # noqa: F401
from .dv_reader import DVReader # noqa: F401
Expand All @@ -21,6 +22,7 @@
# add ".relativepath.ClassName"
_READERS = (
".array_like_reader.ArrayLikeReader",
".bfio_reader.OmeTiledTiffReader",
".bioformats_reader.BioformatsReader",
".czi_reader.CziReader",
".dv_reader.DVReader",
Expand Down
293 changes: 293 additions & 0 deletions aicsimageio/readers/bfio_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
from typing import Any, Dict, List, Optional, Tuple, Union

import dask.array as da
import xarray as xr
from bfio import BioReader
from fsspec.implementations.local import LocalFileSystem
from fsspec.spec import AbstractFileSystem
from ome_types import OME
from tifffile.tifffile import TiffFileError, TiffTags

from .. import constants, exceptions, transforms, types
from ..dimensions import DEFAULT_DIMENSION_ORDER
from ..exceptions import UnsupportedFileFormatError
from ..metadata import utils as metadata_utils
from ..types import PhysicalPixelSizes
from ..utils import io_utils
from .reader import Reader

###############################################################################

log = logging.getLogger(__name__)

###############################################################################


class BfioReader(Reader):
"""
Abstract bfio reader to utilize optimized readers for ome tiled tiffs and ome zarr.
Parameters
----------
image: types.PathLike
Path to image file.
chunk_dims: List[str]
Which dimensions to create chunks for.
Default: DEFAULT_CHUNK_DIMS
Note: Dimensions.SpatialY, and Dimensions.SpatialX will always be added to the
list if not present during dask array construction.
out_order: List[str]
The output dimension ordering.
Default: DEFAULT_DIMENSION_ORDER
Notes
-----
If the OME metadata in your file isn't OME schema compliant or does not validate
this will fail to read your file and raise an exception.
If the OME metadata in your file doesn't use the latest OME schema (2016-06),
this reader will make a request to the referenced remote OME schema to validate.
"""

backend: Optional[str] = None

def _general_data_array_constructor(
self,
image_data: types.ArrayLike,
tiff_tags: Optional[TiffTags] = None,
) -> xr.DataArray:

# Unpack dims and coords from OME
_, coords = metadata_utils.get_dims_and_coords_from_ome(
ome=self._rdr.metadata,
scene_index=0,
)

coords = {d: coords[d] for d in self.out_dim_order if d in coords}
image_data = transforms.reshape_data(
image_data, self.native_dim_order, "".join(self.out_dim_order)
)

attrs = {constants.METADATA_PROCESSED: self._rdr.metadata}

if tiff_tags is not None:
attrs[constants.METADATA_UNPROCESSED] = tiff_tags

return xr.DataArray(
image_data,
dims=self.out_dim_order,
coords=coords, # type: ignore
attrs=attrs,
)

@staticmethod
def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool:
"""This method should be overwritten by a subclass."""
try:
with BioReader(path):

return True

except Exception:

return False

def __init__(
self,
image: types.PathLike,
chunk_dims: Optional[Union[str, List[str]]] = None,
out_order: str = DEFAULT_DIMENSION_ORDER,
**kwargs: Any,
):
# Expand details of provided image
self._fs, self._path = io_utils.pathlike_to_fs(image, enforce_exists=True)

if not isinstance(self._fs, LocalFileSystem):
raise ValueError(
"Cannot read .ome.tif from non-local file system. "
f"Received URI: {self._path}, which points to {type(self._fs)}."
)

try:
self._rdr = BioReader(self._path, backend=self.backend)
except (TypeError, ValueError, TiffFileError):
raise exceptions.UnsupportedFileFormatError(
self.__class__.__name__, self._path
)

# Add ndim attribute so _rdr can be passed directly to dask
self._rdr.ndim = len(self._rdr.shape)

# Setup dimension ordering
dims = "YXZCT"
self.native_dim_order = dims[: len(self._rdr.shape)]
assert all(d in out_order for d in dims)
self.out_dim_order = [d for d in out_order if d in dims]

# Currently do not support custom chunking, throw a warning.
if chunk_dims is not None:
log.warning(
"OmeTiledTiffReader does not currently support custom chunking."
)

@property
def scenes(self) -> Tuple[str, ...]:
return tuple(image_meta.id for image_meta in self._rdr.metadata.images)

@property
def current_scene(self) -> str:
return self.scenes[self._current_scene_index]

@property
def current_scene_index(self) -> int:
return self._current_scene_index

def set_scene(self, scene_id: Union[str, int]) -> None:
"""
For all BfioReader subclasses, the only allowed value is the name of the first
scene since only the first scene can be read by BioReader objects. This method
exists primarily to help this Reader fit into existing unit test templates and
in case BioReader is updated to support multiple scenes.
Parameters
----------
scene_id: Union[str, int]
The scene id (if string) or scene index (if integer)
to set as the operating scene.
Raises
------
IndexError
The provided scene id or index does not reference the first scene.
TypeError
The provided value wasn't a string (scene id) or integer (scene index).
"""
# Route to int or str setting
if isinstance(scene_id, (str, int)):
# Only need to run when the scene id is different from current scene
if scene_id not in (self.current_scene, self.current_scene_index):

raise IndexError(
"Scene id: Cannot change scene for "
+ f"{self.__class__.__name__} objects."
)

else:
raise TypeError(
f"Must provide either a string (for scene id) "
f"or integer (for scene index). Provided: {scene_id} ({type(scene_id)}."
)

@property
def channel_names(self) -> Optional[List[str]]:

return self._rdr.channel_names

@property
def physical_pixel_sizes(self) -> PhysicalPixelSizes:
return types.PhysicalPixelSizes(
self._rdr.ps_z[0],
self._rdr.ps_y[0],
self._rdr.ps_x[0],
)

def _read_delayed(self) -> xr.DataArray:

return self._general_data_array_constructor(
da.from_array(self._rdr, chunks=(1024, 1024) + (1,) * (self._rdr.ndim - 2)),
self._tiff_tags(),
)

def _tiff_tags(self) -> Optional[Dict[str, str]]:

tiff_tags: Optional[Dict[str, str]] = None
if self.backend == "python":
# Create a copy since TiffTags are not serializable
tiff_tags = {
code: tag.value
for code, tag in self._rdr._backend._rdr.pages[0].tags.items()
}

return tiff_tags

def _read_immediate(self) -> xr.DataArray:
return self._general_data_array_constructor(
self._rdr[:],
self._tiff_tags(),
)

@property
def ome_metadata(self) -> OME:
return self._rdr.metadata


class OmeTiledTiffReader(BfioReader):
"""
Wrapper around bfio.BioReader(backend="python").
The OmeTiledTiffReader is an optimized TIFF reader written in pure Python, built on
top of tifffile. This reader is optimized for speed and scalability, but will only
read TIFF files that meet the following requirements:
1. TileWidth and TileHeight tags must both be set to 1024
2. The Description tag must contain OMEXML
3. The OMEXML channel ordering must be set to XYZCT
4. Channels cannot be interleaved, meaning individual channels must be planes.
The advantage of the reader for files that meet these requirements are improvements
in reading speed, especially when accessing data using dask.
This TIFF reader will only read the first image and pyramid layer. If pyramid layers
or images beyond the first image in the file need to be read, use the OmeTiffReader.
Parameters
----------
image: types.PathLike
Path to image file.
chunk_dims: List[str]
Which dimensions to create chunks for.
Default: DEFAULT_CHUNK_DIMS
Note: Dimensions.SpatialY, and Dimensions.SpatialX will always be added to the
list if not present during dask array construction.
out_order: List[str]
The output dimension ordering.
Default: DEFAULT_DIMENSION_ORDER
Notes
-----
If the OME metadata in your file isn't OME schema compliant or does not validate
this will fail to read your file and raise an exception.
If the OME metadata in your file doesn't use the latest OME schema (2016-06),
this reader will make a request to the referenced remote OME schema to validate.
"""

backend: str = "python"

@staticmethod
def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool:
try:
if not isinstance(fs, LocalFileSystem):
return False

with BioReader(path, backend="python") as br:

# Fail fast if multi-image file
if len(br.metadata.images) > 1:
raise UnsupportedFileFormatError(
path,
"This file contains more than one scene and only the first "
+ "scene can be read by the OmeTiledTiffReader. "
+ "To read additional scenes, use the TiffReader, "
+ "OmeTiffReader, or BioformatsReader.",
)

return True

# tifffile exceptions
except (TypeError, ValueError):
return False
Loading

0 comments on commit 4bace71

Please sign in to comment.