-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Prototype of bfio tiled tiff image reader * Minimal bfio OME Tiled Tiff Reader * Fixed code to pass CI checks * Made tiled tiff reader serializable, added tests/benchmarks * Fixed bug in output dimension ordering * Finalize and cleanup * Fixed bfio version * Removed parameter from unit test * Cleaned up documentation. Fast fail on multi-image files. Fail on non-local fs. * Updated resource hash * Added documentation, unit test for selected tiff reader
- Loading branch information
1 parent
f5b8803
commit 4bace71
Showing
9 changed files
with
562 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ optionally installed using `[...]` syntax. | |
- For a single additional supported format (e.g. ND2): `pip install aicsimageio[nd2]` | ||
- For a single additional supported format (e.g. ND2), development head: `pip install "aicsimageio[nd2] @ git+https://github.com/AllenCellModeling/aicsimageio.git"` | ||
- For a single additional supported format (e.g. ND2), specific tag (e.g. `v4.0.0.dev6`): `pip install "aicsimageio[nd2] @ git+https://github.com/AllenCellModeling/[email protected]"` | ||
- For faster OME-TIFF reading with tile tags: `pip install aicsimageio[bfio]` | ||
- For multiple additional supported formats: `pip install aicsimageio[base-imageio,nd2]` | ||
- For all additional supported (and openly licensed) formats: `pip install aicsimageio[all]` | ||
- Due to the GPL license, LIF support is not included with the `[all]` extra, and must be installed manually with `pip install aicsimageio readlif>=0.6.4` | ||
|
@@ -338,4 +339,4 @@ _Free software: BSD-3-Clause_ | |
|
||
_(The LIF component is licensed under GPLv3 and is not included in this package)_ | ||
_(The Bio-Formats component is licensed under GPLv2 and is not included in this package)_ | ||
_(The CZI component is licensed under GPLv3 and is not included in this package)_ | ||
_(The CZI component is licensed under GPLv3 and is not included in this package)_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,293 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
import logging | ||
from typing import Any, Dict, List, Optional, Tuple, Union | ||
|
||
import dask.array as da | ||
import xarray as xr | ||
from bfio import BioReader | ||
from fsspec.implementations.local import LocalFileSystem | ||
from fsspec.spec import AbstractFileSystem | ||
from ome_types import OME | ||
from tifffile.tifffile import TiffFileError, TiffTags | ||
|
||
from .. import constants, exceptions, transforms, types | ||
from ..dimensions import DEFAULT_DIMENSION_ORDER | ||
from ..exceptions import UnsupportedFileFormatError | ||
from ..metadata import utils as metadata_utils | ||
from ..types import PhysicalPixelSizes | ||
from ..utils import io_utils | ||
from .reader import Reader | ||
|
||
############################################################################### | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
############################################################################### | ||
|
||
|
||
class BfioReader(Reader): | ||
""" | ||
Abstract bfio reader to utilize optimized readers for ome tiled tiffs and ome zarr. | ||
Parameters | ||
---------- | ||
image: types.PathLike | ||
Path to image file. | ||
chunk_dims: List[str] | ||
Which dimensions to create chunks for. | ||
Default: DEFAULT_CHUNK_DIMS | ||
Note: Dimensions.SpatialY, and Dimensions.SpatialX will always be added to the | ||
list if not present during dask array construction. | ||
out_order: List[str] | ||
The output dimension ordering. | ||
Default: DEFAULT_DIMENSION_ORDER | ||
Notes | ||
----- | ||
If the OME metadata in your file isn't OME schema compliant or does not validate | ||
this will fail to read your file and raise an exception. | ||
If the OME metadata in your file doesn't use the latest OME schema (2016-06), | ||
this reader will make a request to the referenced remote OME schema to validate. | ||
""" | ||
|
||
backend: Optional[str] = None | ||
|
||
def _general_data_array_constructor( | ||
self, | ||
image_data: types.ArrayLike, | ||
tiff_tags: Optional[TiffTags] = None, | ||
) -> xr.DataArray: | ||
|
||
# Unpack dims and coords from OME | ||
_, coords = metadata_utils.get_dims_and_coords_from_ome( | ||
ome=self._rdr.metadata, | ||
scene_index=0, | ||
) | ||
|
||
coords = {d: coords[d] for d in self.out_dim_order if d in coords} | ||
image_data = transforms.reshape_data( | ||
image_data, self.native_dim_order, "".join(self.out_dim_order) | ||
) | ||
|
||
attrs = {constants.METADATA_PROCESSED: self._rdr.metadata} | ||
|
||
if tiff_tags is not None: | ||
attrs[constants.METADATA_UNPROCESSED] = tiff_tags | ||
|
||
return xr.DataArray( | ||
image_data, | ||
dims=self.out_dim_order, | ||
coords=coords, # type: ignore | ||
attrs=attrs, | ||
) | ||
|
||
@staticmethod | ||
def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool: | ||
"""This method should be overwritten by a subclass.""" | ||
try: | ||
with BioReader(path): | ||
|
||
return True | ||
|
||
except Exception: | ||
|
||
return False | ||
|
||
def __init__( | ||
self, | ||
image: types.PathLike, | ||
chunk_dims: Optional[Union[str, List[str]]] = None, | ||
out_order: str = DEFAULT_DIMENSION_ORDER, | ||
**kwargs: Any, | ||
): | ||
# Expand details of provided image | ||
self._fs, self._path = io_utils.pathlike_to_fs(image, enforce_exists=True) | ||
|
||
if not isinstance(self._fs, LocalFileSystem): | ||
raise ValueError( | ||
"Cannot read .ome.tif from non-local file system. " | ||
f"Received URI: {self._path}, which points to {type(self._fs)}." | ||
) | ||
|
||
try: | ||
self._rdr = BioReader(self._path, backend=self.backend) | ||
except (TypeError, ValueError, TiffFileError): | ||
raise exceptions.UnsupportedFileFormatError( | ||
self.__class__.__name__, self._path | ||
) | ||
|
||
# Add ndim attribute so _rdr can be passed directly to dask | ||
self._rdr.ndim = len(self._rdr.shape) | ||
|
||
# Setup dimension ordering | ||
dims = "YXZCT" | ||
self.native_dim_order = dims[: len(self._rdr.shape)] | ||
assert all(d in out_order for d in dims) | ||
self.out_dim_order = [d for d in out_order if d in dims] | ||
|
||
# Currently do not support custom chunking, throw a warning. | ||
if chunk_dims is not None: | ||
log.warning( | ||
"OmeTiledTiffReader does not currently support custom chunking." | ||
) | ||
|
||
@property | ||
def scenes(self) -> Tuple[str, ...]: | ||
return tuple(image_meta.id for image_meta in self._rdr.metadata.images) | ||
|
||
@property | ||
def current_scene(self) -> str: | ||
return self.scenes[self._current_scene_index] | ||
|
||
@property | ||
def current_scene_index(self) -> int: | ||
return self._current_scene_index | ||
|
||
def set_scene(self, scene_id: Union[str, int]) -> None: | ||
""" | ||
For all BfioReader subclasses, the only allowed value is the name of the first | ||
scene since only the first scene can be read by BioReader objects. This method | ||
exists primarily to help this Reader fit into existing unit test templates and | ||
in case BioReader is updated to support multiple scenes. | ||
Parameters | ||
---------- | ||
scene_id: Union[str, int] | ||
The scene id (if string) or scene index (if integer) | ||
to set as the operating scene. | ||
Raises | ||
------ | ||
IndexError | ||
The provided scene id or index does not reference the first scene. | ||
TypeError | ||
The provided value wasn't a string (scene id) or integer (scene index). | ||
""" | ||
# Route to int or str setting | ||
if isinstance(scene_id, (str, int)): | ||
# Only need to run when the scene id is different from current scene | ||
if scene_id not in (self.current_scene, self.current_scene_index): | ||
|
||
raise IndexError( | ||
"Scene id: Cannot change scene for " | ||
+ f"{self.__class__.__name__} objects." | ||
) | ||
|
||
else: | ||
raise TypeError( | ||
f"Must provide either a string (for scene id) " | ||
f"or integer (for scene index). Provided: {scene_id} ({type(scene_id)}." | ||
) | ||
|
||
@property | ||
def channel_names(self) -> Optional[List[str]]: | ||
|
||
return self._rdr.channel_names | ||
|
||
@property | ||
def physical_pixel_sizes(self) -> PhysicalPixelSizes: | ||
return types.PhysicalPixelSizes( | ||
self._rdr.ps_z[0], | ||
self._rdr.ps_y[0], | ||
self._rdr.ps_x[0], | ||
) | ||
|
||
def _read_delayed(self) -> xr.DataArray: | ||
|
||
return self._general_data_array_constructor( | ||
da.from_array(self._rdr, chunks=(1024, 1024) + (1,) * (self._rdr.ndim - 2)), | ||
self._tiff_tags(), | ||
) | ||
|
||
def _tiff_tags(self) -> Optional[Dict[str, str]]: | ||
|
||
tiff_tags: Optional[Dict[str, str]] = None | ||
if self.backend == "python": | ||
# Create a copy since TiffTags are not serializable | ||
tiff_tags = { | ||
code: tag.value | ||
for code, tag in self._rdr._backend._rdr.pages[0].tags.items() | ||
} | ||
|
||
return tiff_tags | ||
|
||
def _read_immediate(self) -> xr.DataArray: | ||
return self._general_data_array_constructor( | ||
self._rdr[:], | ||
self._tiff_tags(), | ||
) | ||
|
||
@property | ||
def ome_metadata(self) -> OME: | ||
return self._rdr.metadata | ||
|
||
|
||
class OmeTiledTiffReader(BfioReader): | ||
""" | ||
Wrapper around bfio.BioReader(backend="python"). | ||
The OmeTiledTiffReader is an optimized TIFF reader written in pure Python, built on | ||
top of tifffile. This reader is optimized for speed and scalability, but will only | ||
read TIFF files that meet the following requirements: | ||
1. TileWidth and TileHeight tags must both be set to 1024 | ||
2. The Description tag must contain OMEXML | ||
3. The OMEXML channel ordering must be set to XYZCT | ||
4. Channels cannot be interleaved, meaning individual channels must be planes. | ||
The advantage of the reader for files that meet these requirements are improvements | ||
in reading speed, especially when accessing data using dask. | ||
This TIFF reader will only read the first image and pyramid layer. If pyramid layers | ||
or images beyond the first image in the file need to be read, use the OmeTiffReader. | ||
Parameters | ||
---------- | ||
image: types.PathLike | ||
Path to image file. | ||
chunk_dims: List[str] | ||
Which dimensions to create chunks for. | ||
Default: DEFAULT_CHUNK_DIMS | ||
Note: Dimensions.SpatialY, and Dimensions.SpatialX will always be added to the | ||
list if not present during dask array construction. | ||
out_order: List[str] | ||
The output dimension ordering. | ||
Default: DEFAULT_DIMENSION_ORDER | ||
Notes | ||
----- | ||
If the OME metadata in your file isn't OME schema compliant or does not validate | ||
this will fail to read your file and raise an exception. | ||
If the OME metadata in your file doesn't use the latest OME schema (2016-06), | ||
this reader will make a request to the referenced remote OME schema to validate. | ||
""" | ||
|
||
backend: str = "python" | ||
|
||
@staticmethod | ||
def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool: | ||
try: | ||
if not isinstance(fs, LocalFileSystem): | ||
return False | ||
|
||
with BioReader(path, backend="python") as br: | ||
|
||
# Fail fast if multi-image file | ||
if len(br.metadata.images) > 1: | ||
raise UnsupportedFileFormatError( | ||
path, | ||
"This file contains more than one scene and only the first " | ||
+ "scene can be read by the OmeTiledTiffReader. " | ||
+ "To read additional scenes, use the TiffReader, " | ||
+ "OmeTiffReader, or BioformatsReader.", | ||
) | ||
|
||
return True | ||
|
||
# tifffile exceptions | ||
except (TypeError, ValueError): | ||
return False |
Oops, something went wrong.