From 77a9bdb430f9dc0269a11b95ed82878b981bf583 Mon Sep 17 00:00:00 2001 From: David Stirling Date: Fri, 1 Mar 2024 13:38:01 +0000 Subject: [PATCH 01/10] Revert "Merge branch 'compat-4-2' into 4.2.x" This reverts commit 2bb4b80f37990445922b98e699e5a5207696bd46, reversing changes made to 078fd111767ec693a88128219f3490832ea06fb5. --- cellprofiler_core/analysis/_runner.py | 3 +- cellprofiler_core/constants/image.py | 2 +- .../image/abstract_image/file/_file_image.py | 80 +---- .../abstract_image/file/url/_color_image.py | 4 +- .../abstract_image/file/url/_mask_image.py | 4 +- .../file/url/_monochrome_image.py | 4 - .../abstract_image/file/url/_url_image.py | 12 +- cellprofiler_core/modules/loaddata.py | 27 -- cellprofiler_core/modules/metadata.py | 8 +- cellprofiler_core/modules/namesandtypes.py | 35 +-- cellprofiler_core/preferences/__init__.py | 7 +- cellprofiler_core/utilities/image.py | 3 - cellprofiler_core/utilities/zarr.py | 279 ------------------ setup.py | 19 +- 14 files changed, 24 insertions(+), 463 deletions(-) delete mode 100644 cellprofiler_core/utilities/zarr.py diff --git a/cellprofiler_core/analysis/_runner.py b/cellprofiler_core/analysis/_runner.py index f0ed2ed7..efbee900 100644 --- a/cellprofiler_core/analysis/_runner.py +++ b/cellprofiler_core/analysis/_runner.py @@ -672,8 +672,7 @@ def start_workers(cls, num=None): # closed, the subprocess exits. if hasattr(sys, "frozen"): if sys.platform == "darwin": - executable = os.path.join(os.path.dirname(sys.executable), - "cellprofilerapp") + executable = os.path.join(os.path.dirname(sys.executable), "cp") args = [executable] + aw_args elif sys.platform.startswith("linux"): aw_path = os.path.join(os.path.dirname(__file__), "__init__.py") diff --git a/cellprofiler_core/constants/image.py b/cellprofiler_core/constants/image.py index a2f42e43..23eb9488 100644 --- a/cellprofiler_core/constants/image.py +++ b/cellprofiler_core/constants/image.py @@ -103,4 +103,4 @@ SUB_ALL = "All" SUB_SOME = "Some" FILE_SCHEME = "file:" -PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3", "omero-3d") +PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3") diff --git a/cellprofiler_core/image/abstract_image/file/_file_image.py b/cellprofiler_core/image/abstract_image/file/_file_image.py index e0dee2d0..4b1715d8 100644 --- a/cellprofiler_core/image/abstract_image/file/_file_image.py +++ b/cellprofiler_core/image/abstract_image/file/_file_image.py @@ -9,23 +9,16 @@ import numpy import skimage.io -import requests -from PIL import Image as PilImage -from io import BytesIO -from posixpath import join as urljoin - import cellprofiler_core.preferences from .._abstract_image import AbstractImage from ..._image import Image from ....utilities.image import is_numpy_file from ....utilities.image import is_matlab_file -from ....utilities.image import is_omero3d_path from ....utilities.image import loadmat from ....utilities.image import load_data_file from ....utilities.image import generate_presigned_url from ....constants.image import FILE_SCHEME, PASSTHROUGH_SCHEMES from ....utilities.pathname import pathname2url, url2pathname -from ....utilities.zarr import get_zarr_reader class FileImage(AbstractImage): @@ -42,8 +35,6 @@ def __init__( channel=None, volume=False, spacing=None, - z=None, - t=None, ): """ :param name: Name of image to be provided @@ -93,8 +84,6 @@ def __init__( self.__index = index self.__volume = volume self.__spacing = spacing - self.z_index = z if z is not None else 0 - self.t_index = t if t is not None else 0 self.scale = None @property @@ -187,8 +176,6 @@ def cache_file(self): ) finally: os.close(tempfd) - elif url.lower().endswith('.zarr'): - self.__cached_file = url else: from bioformats.formatreader import get_image_reader @@ -198,8 +185,6 @@ def cache_file(self): return True def get_full_name(self): - if is_omero3d_path(self.__url): - return self.get_url() self.cache_file() if self.__is_cached: return self.__cached_file @@ -218,12 +203,8 @@ def get_md5_hash(self, measurements): # # Cache the MD5 hash on the image reader # - if (is_matlab_file(self.__filename) or - is_numpy_file(self.__filename) or - is_omero3d_path(self.get_url())): + if is_matlab_file(self.__filename) or is_numpy_file(self.__filename): rdr = None - elif self.get_url().endswith('.zarr'): - rdr = get_zarr_reader(None, url=self.get_url()) else: from bioformats.formatreader import get_image_reader @@ -292,8 +273,6 @@ def __set_image(self): url = self.get_url() if url.lower().startswith("omero:"): rdr = get_image_reader(self.get_name(), url=url) - elif url.lower().endswith('.zarr'): - rdr = get_zarr_reader(self.get_name(), url=url) else: rdr = get_image_reader(self.get_name(), url=self.get_url()) if numpy.isscalar(self.index) or self.index is None: @@ -304,8 +283,6 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, - z=self.z_index, - t=self.t_index, ) else: # It's a stack @@ -328,8 +305,6 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, - z=self.z_index, - t=self.t_index, ) stack.append(img) img = numpy.dstack(stack) @@ -355,61 +330,10 @@ def provide_image(self, image_set): def __set_image_volume(self): pathname = url2pathname(self.get_url()) - print("Using pathname: {} for url {}".format(pathname, self.get_url())) + # Volume loading is currently limited to tiffs/numpy files only if is_numpy_file(self.__filename): data = numpy.load(pathname) - elif is_omero3d_path(self.__url): - scheme = 'omero-3d:' - url = self.__url.split(scheme)[1] - parsed_url = urllib.parse.urlparse(url) - query_params = urllib.parse.parse_qs(parsed_url.query) - zmin = int(query_params['zmin'][0]) - zmax = int(query_params['zmax'][0]) - width = int(query_params['width'][0]) - height = int(query_params['height'][0]) - image_id = query_params['imageid'][0] - channel = query_params['c'][0] - stack = numpy.ndarray((zmax - zmin + 1, height, width)) - for i in range(zmin, zmax + 1): - path = urljoin('/tile', image_id, str(i), channel, '0') - url = urllib.parse.urlunparse(( - parsed_url.scheme, - parsed_url.netloc, - path, - '', - parsed_url.query, - '' - )) - print("Requesting URL: {}".format(url)) - timeout = 2 - response = None - while timeout < 500: - try: - response = requests.get(url, timeout=timeout) - except Exception: - print('Get %s with timeout %s sec failed' % ( - url, timeout)) - timeout = timeout**2 - else: - break - if response is None: - raise Exception('Failed to retrieve data from URL') - image_bytes = BytesIO(response.content) - image = PilImage.open(image_bytes) - stack[i - zmin, :, :] = image - data = stack - elif pathname.endswith('.zarr'): - rdr = get_zarr_reader(self.get_name(), url=self.get_url()) - data = rdr.read( - c=self.channel, - series=self.series, - index=None, - rescale=False, - wants_max_intensity=False, - z=None, - t=self.t_index, - ) else: data = imageio.volread(pathname) diff --git a/cellprofiler_core/image/abstract_image/file/url/_color_image.py b/cellprofiler_core/image/abstract_image/file/url/_color_image.py index 0a520b5d..45a903f2 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_color_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_color_image.py @@ -7,7 +7,7 @@ class ColorImage(URLImage): """Provide a color image, tripling a monochrome plane if needed""" def __init__( - self, name, url, series, index, rescale=True, volume=False, spacing=None, z=None, t=None, + self, name, url, series, index, rescale=True, volume=False, spacing=None ): URLImage.__init__( self, @@ -18,8 +18,6 @@ def __init__( index=index, volume=volume, spacing=spacing, - z=z, - t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py index e4f7a37a..1ac74bf8 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py @@ -4,7 +4,7 @@ class MaskImage(MonochromeImage): """Provide a boolean image, converting nonzero to True, zero to False if needed""" - def __init__(self, name, url, series, index, channel, volume=False, spacing=None, z=None, t=None): + def __init__(self, name, url, series, index, channel, volume=False, spacing=None): MonochromeImage.__init__( self, name, @@ -15,8 +15,6 @@ def __init__(self, name, url, series, index, channel, volume=False, spacing=None channel=channel, volume=volume, spacing=spacing, - z=z, - t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py index 951d12d2..188020c3 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py @@ -16,8 +16,6 @@ def __init__( rescale=True, volume=False, spacing=None, - z=None, - t=None, ): URLImage.__init__( self, @@ -29,8 +27,6 @@ def __init__( channel=channel, volume=volume, spacing=spacing, - z=z, - t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_url_image.py b/cellprofiler_core/image/abstract_image/file/url/_url_image.py index fe21b724..0486caec 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_url_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_url_image.py @@ -1,8 +1,6 @@ import os from .....utilities.pathname import url2pathname -from .....utilities.image import is_omero3d_path - from .._file_image import FileImage @@ -19,8 +17,6 @@ def __init__( channel=None, volume=False, spacing=None, - z=None, - t=None, ): if url.lower().startswith("file:"): path = url2pathname(url) @@ -29,17 +25,11 @@ def __init__( pathname = "" filename = url super(URLImage, self).__init__( - name, pathname, filename, rescale, series, index, channel, volume, spacing, z=z, t=t, + name, pathname, filename, rescale, series, index, channel, volume, spacing ) self.url = url def get_url(self): - if is_omero3d_path(self.url): - print("OMERO-3D URL: {}".format(self.url)) - url = self.url.split("omero-3d:")[1] - if url is not None: - return url - return self.url if self.cache_file(): return super(URLImage, self).get_url() return self.url diff --git a/cellprofiler_core/modules/loaddata.py b/cellprofiler_core/modules/loaddata.py index cef6c4cd..75ee9767 100644 --- a/cellprofiler_core/modules/loaddata.py +++ b/cellprofiler_core/modules/loaddata.py @@ -1060,33 +1060,6 @@ def fetch_provider(self, name, measurements, is_image_name=True): frame = measurements["Image", frame_feature] else: frame = None - if url.endswith('.zarr'): - # Zarrs need czt indexing rather than just index. - c, z, t = None, None, None - - if measurements.has_feature("Image", f"Channel_{name}"): - c = measurements["Image", f"Channel_{name}"] - elif measurements.has_feature("Image", "Metadata_C"): - c = measurements["Image", "Metadata_C"] - if measurements.has_feature("Image", f"Z_{name}"): - z = measurements["Image", f"Z_{name}"] - elif measurements.has_feature("Image", "Metadata_Z"): - z = measurements["Image", "Metadata_Z"] - if measurements.has_feature("Image", f"T_{name}"): - t = measurements["Image", f"T_{name}"] - elif measurements.has_feature("Image", "Metadata_T"): - t = measurements["Image", "Metadata_T"] - return FileImage( - name, - path, - filename, - rescale=self.rescale.value and is_image_name, - series=series, - index=frame, - z=z, - channel=c, - t=t, - ) return FileImage( name, path, diff --git a/cellprofiler_core/modules/metadata.py b/cellprofiler_core/modules/metadata.py index fd1a0aa8..5f9019b3 100644 --- a/cellprofiler_core/modules/metadata.py +++ b/cellprofiler_core/modules/metadata.py @@ -1133,7 +1133,6 @@ def msg(url): from bioformats.formatreader import get_omexml_metadata from typing import Optional, Any, Callable from dataclasses import dataclass - from cellprofiler_core.utilities.zarr import get_zarr_metadata @dataclass class ui_context: @@ -1174,7 +1173,6 @@ def update_all_urls(): for i, url in enumerate(urls): try: if not pbar_context.update_callback(pbar_context.dlg, i, url): - break if group.filter_choice == F_FILTERED_IMAGES: match = group.filter.evaluate( @@ -1188,10 +1186,7 @@ def update_all_urls(): continue metadata = filelist.get_metadata(url) if metadata is None: - if url.lower().endswith('.zarr'): - metadata = get_zarr_metadata(url=url) - else: - metadata = get_omexml_metadata(url=url) + metadata = get_omexml_metadata(url=url) filelist.add_metadata(url, metadata) except Exception as e: import logging @@ -1204,6 +1199,7 @@ def update_all_urls(): pbar_context.err_callback(errmsg) update_all_urls() + group.metadata_autoextracted.value = True def on_activated(self, workspace): diff --git a/cellprofiler_core/modules/namesandtypes.py b/cellprofiler_core/modules/namesandtypes.py index f5faacf0..b922c7dc 100644 --- a/cellprofiler_core/modules/namesandtypes.py +++ b/cellprofiler_core/modules/namesandtypes.py @@ -1981,20 +1981,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): javabridge.call(stack, "get", "([I)Ljava/lang/Object;", coords) ) ) - # CZT needed for zarr reading. Provision of index parameter will make bioformats readers ignore these. - if workspace.measurements.has_feature("Image", "Metadata_Z"): - z = workspace.measurements.get_measurement("Image", "Metadata_Z") - if z is not None: - z = int(z) - else: - z = None - if workspace.measurements.has_feature("Image", "Metadata_T"): - t = workspace.measurements.get_measurement("Image", "Metadata_T") - if t is not None: - t = int(t) - else: - t = None if len(ipds) == 1: interleaved = javabridge.get_static_field( "org/cellprofiler/imageset/ImagePlane", "INTERLEAVED", "I" @@ -2007,14 +1994,6 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): series = ipd.series index = ipd.index channel = ipd.channel - if url.lower().endswith('.zarr') and workspace.measurements.has_feature("Image", "Metadata_C"): - # Override channel index with real value if using zarrs - channelM = workspace.measurements.get_measurement("Image", "Metadata_C") - if channelM is not None: - channel = int(channelM) - else: - # Todo: Solve channel ID if mixed channel pipeline - channel = index if channel == monochrome: channel = None elif channel == interleaved: @@ -2022,7 +2001,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): if index == 0: index = None self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, + workspace, name, load_choice, rescale, url, series, index, channel ) elif all([ipd.url == ipds[0].url for ipd in ipds[1:]]): # Can load a simple image with a vector of series/index/channel @@ -2031,7 +2010,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): index = [ipd.index for ipd in ipds] channel = [None if ipd.channel < 0 else ipd.channel for ipd in ipds] self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, + workspace, name, load_choice, rescale, url, series, index, channel ) else: # Different URLs - someone is a clever sadist @@ -2044,7 +2023,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): ) def add_simple_image( - self, workspace, name, load_choice, rescale, url, series, index, channel, z=None, t=None, + self, workspace, name, load_choice, rescale, url, series, index, channel ): m = workspace.measurements @@ -2056,7 +2035,7 @@ def add_simple_image( if load_choice == LOAD_AS_COLOR_IMAGE: provider = ColorImage( - name, url, series, index, rescale, volume=volume, spacing=spacing, z=z, t=t, + name, url, series, index, rescale, volume=volume, spacing=spacing ) elif load_choice == LOAD_AS_GRAYSCALE_IMAGE: provider = MonochromeImage( @@ -2068,16 +2047,14 @@ def add_simple_image( rescale, volume=volume, spacing=spacing, - z=z, - t=t, ) elif load_choice == LOAD_AS_ILLUMINATION_FUNCTION: provider = MonochromeImage( - name, url, series, index, channel, False, volume=volume, spacing=spacing, z=z, t=t, + name, url, series, index, channel, False, volume=volume, spacing=spacing ) elif load_choice == LOAD_AS_MASK: provider = MaskImage( - name, url, series, index, channel, volume=volume, spacing=spacing, z=z, t=t, + name, url, series, index, channel, volume=volume, spacing=spacing ) workspace.image_set.providers.append(provider) diff --git a/cellprofiler_core/preferences/__init__.py b/cellprofiler_core/preferences/__init__.py index 679c5a62..9b3c8001 100644 --- a/cellprofiler_core/preferences/__init__.py +++ b/cellprofiler_core/preferences/__init__.py @@ -82,12 +82,7 @@ def get_config(): try: config = wx.Config.Get(False) except wx.PyNoAppError: - try: - app = wx.App(0) - except SystemExit: - # We're probably building on GitHub Actions - print("Python version doesn't support GUI, no app available.") - return __headless_config + app = wx.App(0) config = wx.Config.Get(False) if not config: wx.Config.Set( diff --git a/cellprofiler_core/utilities/image.py b/cellprofiler_core/utilities/image.py index f2984a4b..51fa71d1 100644 --- a/cellprofiler_core/utilities/image.py +++ b/cellprofiler_core/utilities/image.py @@ -115,9 +115,6 @@ def is_numpy_file(filename): def is_matlab_file(filename): return os.path.splitext(filename)[-1].lower() == ".mat" -def is_omero3d_path(url): - return url.lower().startswith('omero-3d:') - def loadmat(path): imgdata = scipy.io.matlab.mio.loadmat(path, struct_as_record=True) diff --git a/cellprofiler_core/utilities/zarr.py b/cellprofiler_core/utilities/zarr.py deleted file mode 100644 index bbf07c60..00000000 --- a/cellprofiler_core/utilities/zarr.py +++ /dev/null @@ -1,279 +0,0 @@ -import collections -import os -import re -import sys -import tempfile -import urllib.parse -from urllib.request import urlopen - -import numpy -import zarr -import boto3 -import shutil - -from cellprofiler_core.utilities.pathname import url2pathname - -import logging -logger = logging.getLogger(__name__) - - -def get_zarr_metadata(url): - xmlfile = 'METADATA.ome.xml' - parser = urllib.parse.urlparse(url) - if parser.scheme == 'file': - url = url2pathname(url) - elif parser.scheme == 's3': - client = boto3.client('s3') - bucket_name, key = re.compile('s3://([\w\d\-\.]+)/(.*)').search( - url).groups() - key += "/OME/METADATA.ome.xml" - url = client.generate_presigned_url( - 'get_object', - Params={'Bucket': bucket_name, 'Key': key.replace("+", " ")} - ) - src = urlopen(url) - return src.read().decode() - metadata_path = os.path.join(url, "OME", xmlfile) - if os.path.exists(metadata_path): - with open(metadata_path) as data: - return data.read() - elif os.path.exists(os.path.join(url, xmlfile)): - with open(os.path.join(url, xmlfile)) as data: - return data.read() - else: - logger.warning("Input zarr lacks an OME-XML file. " - "CellProfiler will try to construct metadata, but this feature is experimental") - return make_ome_xml(url) - - -def make_ome_xml(url): - # Prototype zarr parser to construct a fake OME XML. - root = zarr.open(url, mode='r') - queue = collections.deque() - queue.append(root) - xmlstr = """""" - while queue: - subject = queue.popleft() - for loc, group in subject.groups(): - queue.append(group) - for loc, array in subject.arrays(): - t, c, z, y, x = array.shape - dtype = array.dtype.name - if dtype == "int64": - # OME-XML can't handle int64, consider as a float instead. - dtype = "float" - xmlstr += \ - f"""UnknownCellProfiler OME Metadata """ - for i in range(c): - xmlstr += f"""""" - xmlstr += """ """ - # We only want the first resolution - break - return xmlstr + "" - - -def get_zarr_reader(key, path=None, url=None): - logger.debug("Getting image reader for: %s, %s, %s" % (key, path, url)) - from bioformats.formatreader import __image_reader_key_cache, __image_reader_cache, release_image_reader - if key in __image_reader_key_cache: - old_path, old_url = __image_reader_key_cache[key] - old_count, rdr = __image_reader_cache[old_path, old_url] - if old_path == path and old_url == url: - return rdr - release_image_reader(key) - if (path, url) in __image_reader_cache: - old_count, rdr = __image_reader_cache[path, url] - else: - rdr = ZarrReader(path, url) - old_count = 0 - __image_reader_cache[path, url] = (old_count + 1, rdr) - __image_reader_key_cache[key] = (path, url) - - return rdr - - -class ZarrReader(object): - def __init__(self, path=None, url=None, perform_init=True): - - self.stream = None - file_scheme = "file:" - - self.using_temp_file = False - - if url is not None: - url = str(url) - if url.lower().startswith(file_scheme): - url = url2pathname(url) - path = url - elif path is None: - path = url - - self.path = path - if path is None: - if not url.lower().startswith('s3:'): - self.path = self.download(url) - else: - if sys.platform.startswith("win"): - self.path = self.path.replace("/", os.path.sep) - filename = os.path.split(path)[1] - store = zarr.storage.FSStore(self.path) - if path.startswith('s3'): - logger.info("Zarr is stored on S3, will try to read directly.") - if '.zmetadata' in store: - # Zarr has consolidated metadata. - self.reader = zarr.convenience.open_consolidated(store, mode='r') - else: - logging.warning(f"Image is on S3 but lacks consolidated metadata. " - f"This may degrade reading performance. URL: {path}") - self.reader = zarr.open(store, mode='r') - elif not os.path.isdir(self.path): - raise IOError("The file, \"%s\", does not exist." % path) - else: - self.reader = zarr.open(store, mode='r') - self.well_map = self.map_wells() - self.series_list = self.map_series() - - def read(self, c=None, z=None, t=None, series=None, index=None, rescale=True, wants_max_intensity=True, channel_names=None, XYWH=None): - """Read a single plane from the image reader file. - :param c: read from this channel. `None` = read color image if multichannel - or interleaved RGB. - :param z: z-stack index - :param t: time index - :param series: series for ``.flex`` and similar multi-stack formats - :param index: if `None`, fall back to ``zct``, otherwise load the indexed frame - :param rescale: `True` to rescale the intensity scale to 0 and 1; `False` to - return the raw values native to the file. - :param wants_max_intensity: if `False`, only return the image; if `True`, - return a tuple of image and max intensity - :param channel_names: provide the channel names for the OME metadata - :param XYWH: a (x, y, w, h) tuple""" - # Index should always be None, we need ctz to properly index zarrs. - logger.debug(f"Reading {c=}, {z=}, {t=}, {series=}, {index=}, {XYWH=}") - c2 = None if c is None else c + 1 - z2 = None if z is None else z + 1 - t2 = None if t is None else t + 1 - if XYWH is not None: - x, y, w, h = XYWH - x = round(x) - y = round(y) - x2 = x + w - y2 = y + h - else: - y, y2, x, x2 = None, None, None, None - if self.well_map: - series_col, series_row, series_field = self.series_list[series] - base_path = self.well_map[(series_col, series_row)] - seriesreader = self.reader[base_path] - field = seriesreader.attrs['well']['images'][series_field]['path'] - # Hard-coding resolution 0 for now - seriesreader = seriesreader[field][0] - else: - seriesreader = self.reader[self.series_list[series]][0] - # Zarr arrays are indexed as TCZYX - if len(seriesreader.shape) == 5: - image = seriesreader[t:t2, c:c2, z:z2, y:y2, x:x2] - else: - image = seriesreader[c:c2, z:z2, y:y2, x:x2] - # Remove redundant axes - image = numpy.squeeze(image) - # C needs to be the last axis, but z should be first. Thank you CellProfiler. - if len(image.shape) > 2 and z is not None: - image = numpy.moveaxis(image, 0, -1) - elif len(image.shape) > 3: - image = numpy.moveaxis(image, 0, -1) - scale = numpy.iinfo(image.dtype).max - if rescale: - image = image.astype(float) / scale - if wants_max_intensity: - if image.dtype in [numpy.int8, numpy.uint8]: - scale = 255 - elif image.dtype in [numpy.int16, numpy.uint16]: - scale = 65535 - elif image.dtype == numpy.int32: - scale = 2 ** 32 - 1 - elif image.dtype == numpy.uint32: - scale = 2 ** 32 - else: - scale = 1 - return image, scale - return image - - def map_wells(self): - # For HCS zarrs, we construct a dictionary mapping well positions to array directories. - attrs = self.reader.attrs - if 'plate' not in attrs or 'wells' not in attrs['plate']: - return False - well_data = attrs['plate']['wells'] - mapper = {} - if 'column_index' in well_data[0]: - # Standard format - for row in well_data: - mapper[(str(row['column_index']), str(row['row_index']))] = row['path'] - else: - for row in well_data: - path = row['path'] - col, row = path.split('/', 1) - mapper[(str(col), str(row))] = path - return mapper - - def map_series(self): - # If in HCS mode we produce a list of (Row, Column, FieldNum) tuples to use with the well map. - # If in non-HCS mode we just make a list of paths to each series. - series_list = [] - if self.well_map: - metadata = get_zarr_metadata(self.path) - from lxml import etree - import io - context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}ImageRef") - for action, node in context: - wellsample = node.getparent() - well = wellsample.getparent() - series_list.append((well.get('Column'), well.get('Row'), well.getchildren().index(wellsample))) - node.clear() - if not series_list: - # No series were found, try constructing from the image tags. - context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}Image") - for action, node in context: - imagepath = node.attrib["Name"] - parts = imagepath.split('/') - series_list.append((parts[1], parts[2], int(parts[3]))) - node.clear() - else: - # No well metadata, just fetch series in order. - queue = collections.deque() - queue.append(self.reader) - while queue: - subject = queue.popleft() - for loc, group in subject.groups(): - queue.append(group) - for loc, array in subject.arrays(): - series_list.append(array.name) - # We only want the first resolution - break - return series_list - - def download(self, url): - # Cloned from bioformats' reader. Should temporarily download URLs. - # No idea if this will work since zarr is a directory-based format. - scheme = urllib.parse.urlparse(url)[0] - ext = url[url.rfind("."):] - urlpath = urllib.parse.urlparse(url)[2] - filename = os.path.basename(self.path) - - self.using_temp_file = True - - src = urlopen(url) - dest_fd, self.path = tempfile.mkstemp(suffix=ext) - try: - with os.fdopen(dest_fd, 'wb') as dest: - shutil.copyfileobj(src, dest) - except: - os.remove(self.path) - finally: - src.close() - - return filename - - def close(self): - # Zarr readers don't need to be explicitly closed. - pass diff --git a/setup.py b/setup.py index d9ecfc90..42b88fb5 100644 --- a/setup.py +++ b/setup.py @@ -21,28 +21,25 @@ "wx": ["wxPython==4.1.0"], }, install_requires=[ - "boto3==1.14.23", + "boto3>=1.12.28", "centrosome==1.2.2", "docutils==0.15.2", - "fsspec==2022.2.0", - "h5py==3.6.0", - "matplotlib==3.1.3", - "numpy==1.23.1", + "h5py~=3.6.0", + "matplotlib>=3.1.3", + "numpy>=1.18.2", "prokaryote==2.4.4", - "psutil==5.7.0", + "psutil>=5.7.0", "python-bioformats==4.0.7", "python-javabridge==4.0.3", - "pyzmq==22.3.0", - "Pillow==8.1.0", + "pyzmq~=22.3", "scikit-image==0.18.3", - "scipy==1.9.0", - "zarr==2.10.2", + "scipy>=1.4.1", ], license="BSD", name="cellprofiler-core", package_data={"cellprofiler_core": ["py.typed"]}, packages=setuptools.find_packages(exclude=["tests"]), - python_requires=">=3.8, <4", + python_requires=">=3.8", url="https://github.com/CellProfiler/core", version="4.2.6", zip_safe=False, From 26054adf7e9c754861cf0d81a9141b3e6b76566b Mon Sep 17 00:00:00 2001 From: Beth Cimini Date: Tue, 23 Jul 2024 17:11:22 -0400 Subject: [PATCH 02/10] Bump centrosome --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 42b88fb5..dde0fab0 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ }, install_requires=[ "boto3>=1.12.28", - "centrosome==1.2.2", + "centrosome>=1.2.3,<1.3", "docutils==0.15.2", "h5py~=3.6.0", "matplotlib>=3.1.3", From 1f8f8922a15aecf30682fb102e2d813d0b6d9d53 Mon Sep 17 00:00:00 2001 From: Beth Cimini Date: Tue, 23 Jul 2024 17:28:07 -0400 Subject: [PATCH 03/10] Add more characters to the troublesome character dict --- cellprofiler_core/module/_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellprofiler_core/module/_module.py b/cellprofiler_core/module/_module.py index a9316c95..8fdbb6bd 100644 --- a/cellprofiler_core/module/_module.py +++ b/cellprofiler_core/module/_module.py @@ -545,7 +545,7 @@ def set_notes(self, notes): """Give the module new user-entered notes """ - sanitization_dict = {"“":"\"","”":"\""} + sanitization_dict = {"“":"\"","”":"\"","—":"-","’":"'","`":"'"} self.__notes = [''.join(sanitization_dict.get(x,x) for x in note) for note in notes] notes = property(get_notes, set_notes) From e96f9a872b3f00c764d440844b4aa7f08a8534df Mon Sep 17 00:00:00 2001 From: bethac07 Date: Thu, 25 Jul 2024 10:54:11 -0400 Subject: [PATCH 04/10] bump 426 to 427 --- cellprofiler_core/__init__.py | 2 +- docs/conf.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cellprofiler_core/__init__.py b/cellprofiler_core/__init__.py index 1eecfc49..6e96fc64 100644 --- a/cellprofiler_core/__init__.py +++ b/cellprofiler_core/__init__.py @@ -1 +1 @@ -__version__ = "4.2.6" +__version__ = "4.2.7" diff --git a/docs/conf.py b/docs/conf.py index 43001d65..3f7911ab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,6 @@ project = "CellProfiler-core" -release = "4.2.6" +release = "4.2.7" templates_path = ["_templates"] diff --git a/setup.cfg b/setup.cfg index 7a10dda6..90ece8ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,6 @@ filterwarnings = ignore::DeprecationWarning ignore::FutureWarning minversion = - 4.2.6 + 4.2.7 testpaths = ./tests/ diff --git a/setup.py b/setup.py index dde0fab0..e3cef28b 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,6 @@ packages=setuptools.find_packages(exclude=["tests"]), python_requires=">=3.8", url="https://github.com/CellProfiler/core", - version="4.2.6", + version="4.2.7", zip_safe=False, ) From 29e7bbc69396e099ff13f4e357a09e14a0ff6c77 Mon Sep 17 00:00:00 2001 From: emiglietta Date: Fri, 23 Aug 2024 14:30:32 -0400 Subject: [PATCH 05/10] Fix worker halting - add retry for workers failing to send MeasurementsReport, so they don't hang indefintely - add safeguard around post group run --- cellprofiler_core/analysis/_runner.py | 2 +- cellprofiler_core/utilities/zmq/__init__.py | 1 + cellprofiler_core/utilities/zmq/_event.py | 4 ++ cellprofiler_core/worker/_worker.py | 51 +++++++++++++++------ 4 files changed, 42 insertions(+), 16 deletions(-) create mode 100644 cellprofiler_core/utilities/zmq/_event.py diff --git a/cellprofiler_core/analysis/_runner.py b/cellprofiler_core/analysis/_runner.py index efbee900..d099dabd 100644 --- a/cellprofiler_core/analysis/_runner.py +++ b/cellprofiler_core/analysis/_runner.py @@ -102,7 +102,7 @@ def __init__( # should have jobserver() call load_measurements_from_buffer() rather # than interface() doing so. Currently, passing measurements in this # way seems like it might be buggy: - # http://code.google.com/p/h5py/issues/detail?id=244 + # https://github.com/h5py/h5py/issues/244 self.received_measurements_queue = queue.Queue(maxsize=10) self.shared_dicts = None diff --git a/cellprofiler_core/utilities/zmq/__init__.py b/cellprofiler_core/utilities/zmq/__init__.py index 7bd69099..01faab0e 100644 --- a/cellprofiler_core/utilities/zmq/__init__.py +++ b/cellprofiler_core/utilities/zmq/__init__.py @@ -17,6 +17,7 @@ LockStatusRequest, Request, ) +from ._event import PollTimeoutException NOTIFY_SOCKET_ADDR = "inproc://BoundaryNotifications" SD_KEY_DICT = "__keydict__" diff --git a/cellprofiler_core/utilities/zmq/_event.py b/cellprofiler_core/utilities/zmq/_event.py new file mode 100644 index 00000000..ca125c73 --- /dev/null +++ b/cellprofiler_core/utilities/zmq/_event.py @@ -0,0 +1,4 @@ +class PollTimeoutException(Exception): + """Exception issued by a timeout from polling""" + + pass diff --git a/cellprofiler_core/worker/_worker.py b/cellprofiler_core/worker/_worker.py index 8c100020..e17f7dff 100644 --- a/cellprofiler_core/worker/_worker.py +++ b/cellprofiler_core/worker/_worker.py @@ -29,12 +29,15 @@ from ..constants.worker import the_zmq_context from ..measurement import Measurements from ..utilities.measurement import load_measurements_from_buffer +from ..utilities.zmq import PollTimeoutException from ..pipeline import CancelledException from ..preferences import get_awt_headless from ..preferences import set_preferences_from_dict from ..utilities.zmq.communicable.reply.upstream_exit import UpstreamExit from ..workspace import Workspace +LOGGER = logging.getLogger(__name__) + class Worker: """An analysis worker processing work at a given address @@ -124,6 +127,7 @@ def run(self): ) t0 = time.time() self.work_socket = the_zmq_context.socket(zmq.REQ) + self.work_socket.set_hwm(2000) self.work_socket.connect(self.work_request_address) # fetch a job the_request = Work(self.current_analysis_id) @@ -304,18 +308,21 @@ def do_job(self, job): return if worker_runs_post_group: - last_workspace.interaction_handler = self.interaction_handler - last_workspace.cancel_handler = self.cancel_handler - last_workspace.post_group_display_handler = ( - self.post_group_display_handler - ) - # There might be an exception in this call, but it will be - # handled elsewhere, and there's nothing we can do for it - # here. - current_pipeline.post_group( - last_workspace, current_measurements.get_grouping_keys() - ) - del last_workspace + if not last_workspace is None: + last_workspace.interaction_handler = self.interaction_handler + last_workspace.cancel_handler = self.cancel_handler + last_workspace.post_group_display_handler = ( + self.post_group_display_handler + ) + # There might be an exception in this call, but it will be + # handled elsewhere, and there's nothing we can do for it + # here. + current_pipeline.post_group( + last_workspace, current_measurements.get_grouping_keys() + ) + del last_workspace + else: + LOGGER.error("No workspace from last image set, cannot run post group") # send measurements back to server req = MeasurementsReport( @@ -323,7 +330,18 @@ def do_job(self, job): buf=current_measurements.file_contents(), image_set_numbers=image_set_numbers, ) - rep = self.send(req) + + while True: + try: + rep = self.send(req, timeout=4000) + break + except PollTimeoutException: + LOGGER.info(f"Worker sending MeasurementsReport halted, retrying for job {str(job.image_set_numbers)}") + self.work_socket.close(linger=0) + self.work_socket = the_zmq_context.socket(zmq.REQ) + self.work_socket.set_hwm(2000) + self.work_socket.connect(self.work_request_address) + continue except CancelledException: # Main thread received shutdown signal @@ -389,7 +407,7 @@ def omero_login_handler(self): rep = self.send(req) use_omero_credentials(rep.credentials) - def send(self, req, work_socket=None): + def send(self, req, work_socket=None, timeout=None): """Send a request and receive a reply req - request to send @@ -410,7 +428,10 @@ def send(self, req, work_socket=None): req.send_only(work_socket) response = None while response is None: - for socket, state in poller.poll(): + poll_res = poller.poll(timeout) + if len(poll_res) == 0: + raise PollTimeoutException + for socket, state in poll_res: if socket == self.notify_socket and state == zmq.POLLIN: notify_msg = self.notify_socket.recv() if notify_msg == NOTIFY_STOP: From 773d15cf589622cff78b4226c9d372e397b57654 Mon Sep 17 00:00:00 2001 From: Nodar Gogoberidze Date: Mon, 26 Aug 2024 20:24:00 +0000 Subject: [PATCH 06/10] Bump to more avialable macos version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e9e644a5..de908da6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,7 +5,7 @@ jobs: strategy: max-parallel: 4 matrix: - os: [macos-11] + os: [macos-13] python-version: [ 3.8, 3.9 ] fail-fast: false runs-on: ${{ matrix.os }} From 78a445578ea103cf342086939e4cc2df50349b5d Mon Sep 17 00:00:00 2001 From: Nodar Gogoberidze Date: Mon, 26 Aug 2024 20:49:19 +0000 Subject: [PATCH 07/10] Pin build tools --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index de908da6..9e57f733 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,8 @@ jobs: env: LC_ALL: "en_US.UTF-8" run: | - brew install mysql + brew install mysql@8.0 + brew link mysql@8.0 --force mysql.server start mysql --host=$CP_MYSQL_TEST_HOST --user=$CP_MYSQL_TEST_USER --execute="CREATE DATABASE $CP_MYSQL_TEST_DB;" --skip-password - name: Installation @@ -51,7 +52,7 @@ jobs: | pip install pyinstaller pip install --upgrade pip setuptools wheel - pip install numpy>=1.20.1 + pip install "numpy>=1.20.1,<2" "Cython<3" # git clone https://github.com/CellProfiler/CellProfiler.git ~/cellprofiler pip install -e .[test] # pip install -e ~/cellprofiler From c4c83066f95793cac0de2e603570226a507ffada Mon Sep 17 00:00:00 2001 From: Nodar Gogoberidze Date: Fri, 13 Sep 2024 15:00:54 -0400 Subject: [PATCH 08/10] Bumpt to 4.2.8 --- cellprofiler_core/__init__.py | 2 +- cellprofiler_core/__main__.py | 55 ------------------- cellprofiler_core/commands/__init__.py | 0 .../commands/_pipeline_command.py | 40 -------------- cellprofiler_core/commands/_worker_command.py | 15 ----- docs/conf.py | 2 +- setup.cfg | 2 +- setup.py | 12 ++-- 8 files changed, 8 insertions(+), 120 deletions(-) delete mode 100644 cellprofiler_core/commands/__init__.py delete mode 100644 cellprofiler_core/commands/_pipeline_command.py delete mode 100644 cellprofiler_core/commands/_worker_command.py diff --git a/cellprofiler_core/__init__.py b/cellprofiler_core/__init__.py index 6e96fc64..213df64a 100644 --- a/cellprofiler_core/__init__.py +++ b/cellprofiler_core/__init__.py @@ -1 +1 @@ -__version__ = "4.2.7" +__version__ = "4.2.8" diff --git a/cellprofiler_core/__main__.py b/cellprofiler_core/__main__.py index e2d6fe32..e69de29b 100644 --- a/cellprofiler_core/__main__.py +++ b/cellprofiler_core/__main__.py @@ -1,55 +0,0 @@ -import os -import pathlib - -import click - -import cellprofiler_core.commands - -CONTEXT_SETTINGS = dict(auto_envvar_prefix="COMPLEX") - - -class Command(click.MultiCommand): - def get_command(self, context, name): - try: - name = f"cellprofiler_core.commands._{name}_command" - - imported_module = __import__(name, None, None, ["command"]) - except ImportError: - return - - return imported_module.command - - def list_commands(self, context): - command_names = [] - - commands_pathname = cellprofiler_core.commands.__file__ - - commands_directory = pathlib.Path(commands_pathname).parent - - for filename in os.listdir(commands_directory): - if filename.endswith("_command.py") and filename.startswith("_"): - command_name = filename[1:-11] - - command_names += [command_name] - - command_names.sort() - - return command_names - - -class Environment: - def __init__(self): - pass - - -pass_environment = click.make_pass_decorator(Environment, ensure=True) - - -@click.command(cls=Command, context_settings=CONTEXT_SETTINGS) -@pass_environment -def main(context): - pass - - -if __name__ == "__main__": - main({}) diff --git a/cellprofiler_core/commands/__init__.py b/cellprofiler_core/commands/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/cellprofiler_core/commands/_pipeline_command.py b/cellprofiler_core/commands/_pipeline_command.py deleted file mode 100644 index 4a5d3d78..00000000 --- a/cellprofiler_core/commands/_pipeline_command.py +++ /dev/null @@ -1,40 +0,0 @@ -import click - -from ..__main__ import pass_environment - - -@click.group("pipeline") -@pass_environment -def command(context): - pass - - -@command.command("measurements", help="returns measurements extracted by the pipeline") -@pass_environment -def measurements(context): - pass - - -@command.command("run", help="executes the pipeline") -@click.argument("pipeline", type=click.File("r")) -@click.option("--batch-size", type=int) -@click.option("--data", type=click.Path()) -@click.option("--default-images-directory", type=click.Path()) -@click.option("--default-output-directory", type=click.Path()) -@click.option("--images", multiple=True, type=click.Path()) -@click.option("--beginning", default=1, type=int) -@click.option("--end", type=int) -@click.option("--group", type=str) -@pass_environment -def run( - context, - batch_size, - data, - images_directory, - output_directory, - images, - beginning, - end, - grouping, -): - pass diff --git a/cellprofiler_core/commands/_worker_command.py b/cellprofiler_core/commands/_worker_command.py deleted file mode 100644 index b2298f77..00000000 --- a/cellprofiler_core/commands/_worker_command.py +++ /dev/null @@ -1,15 +0,0 @@ -import click - -from ..__main__ import pass_environment - - -@click.group("worker") -@pass_environment -def command(context): - pass - - -@command.command("start") -@pass_environment -def start(context): - pass diff --git a/docs/conf.py b/docs/conf.py index 3f7911ab..aff4ad8f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,6 @@ project = "CellProfiler-core" -release = "4.2.7" +release = "4.2.8" templates_path = ["_templates"] diff --git a/setup.cfg b/setup.cfg index 90ece8ce..4441e861 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,6 @@ filterwarnings = ignore::DeprecationWarning ignore::FutureWarning minversion = - 4.2.7 + 4.2.8 testpaths = ./tests/ diff --git a/setup.py b/setup.py index e3cef28b..a3461f70 100644 --- a/setup.py +++ b/setup.py @@ -8,16 +8,14 @@ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], extras_require={ "dev": [ - "black==19.10b0", - "click>=7.1.2", - "pre-commit==2.2.0", "sphinx==3.1.2", "twine==3.1.1", ], - "test": ["pytest==5.4.1"], + "test": ["pytest~=7.4.1"], "wx": ["wxPython==4.1.0"], }, install_requires=[ @@ -29,8 +27,8 @@ "numpy>=1.18.2", "prokaryote==2.4.4", "psutil>=5.7.0", - "python-bioformats==4.0.7", - "python-javabridge==4.0.3", + "python-bioformats>=4.0.7,<5", + "python-javabridge>=4.0.3,<5", "pyzmq~=22.3", "scikit-image==0.18.3", "scipy>=1.4.1", @@ -41,6 +39,6 @@ packages=setuptools.find_packages(exclude=["tests"]), python_requires=">=3.8", url="https://github.com/CellProfiler/core", - version="4.2.7", + version="4.2.8", zip_safe=False, ) From f4211c18f8611a112b3339c457db20981dbd0ec1 Mon Sep 17 00:00:00 2001 From: Nodar Gogoberidze Date: Tue, 17 Sep 2024 16:40:29 -0400 Subject: [PATCH 09/10] [build] Update sphinx for build --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a3461f70..8c97f2f2 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ ], extras_require={ "dev": [ - "sphinx==3.1.2", + "sphinx>=3.1.2", "twine==3.1.1", ], "test": ["pytest~=7.4.1"], From fefde860f62b22516fabb78a045b192207bb6ccb Mon Sep 17 00:00:00 2001 From: David Stirling Date: Fri, 11 Oct 2024 11:40:08 +0100 Subject: [PATCH 10/10] Reapply GS-specific modifications --- cellprofiler_core/analysis/_runner.py | 3 +- cellprofiler_core/constants/image.py | 2 +- .../image/abstract_image/file/_file_image.py | 80 ++++- .../abstract_image/file/url/_color_image.py | 4 +- .../abstract_image/file/url/_mask_image.py | 4 +- .../file/url/_monochrome_image.py | 4 + .../abstract_image/file/url/_url_image.py | 12 +- cellprofiler_core/modules/loaddata.py | 27 ++ cellprofiler_core/modules/metadata.py | 8 +- cellprofiler_core/modules/namesandtypes.py | 35 ++- cellprofiler_core/preferences/__init__.py | 7 +- cellprofiler_core/utilities/image.py | 3 + cellprofiler_core/utilities/zarr.py | 279 ++++++++++++++++++ 13 files changed, 452 insertions(+), 16 deletions(-) create mode 100644 cellprofiler_core/utilities/zarr.py diff --git a/cellprofiler_core/analysis/_runner.py b/cellprofiler_core/analysis/_runner.py index d099dabd..712bf80b 100644 --- a/cellprofiler_core/analysis/_runner.py +++ b/cellprofiler_core/analysis/_runner.py @@ -672,7 +672,8 @@ def start_workers(cls, num=None): # closed, the subprocess exits. if hasattr(sys, "frozen"): if sys.platform == "darwin": - executable = os.path.join(os.path.dirname(sys.executable), "cp") + executable = os.path.join(os.path.dirname(sys.executable), + "cellprofilerapp") args = [executable] + aw_args elif sys.platform.startswith("linux"): aw_path = os.path.join(os.path.dirname(__file__), "__init__.py") diff --git a/cellprofiler_core/constants/image.py b/cellprofiler_core/constants/image.py index 23eb9488..a2f42e43 100644 --- a/cellprofiler_core/constants/image.py +++ b/cellprofiler_core/constants/image.py @@ -103,4 +103,4 @@ SUB_ALL = "All" SUB_SOME = "Some" FILE_SCHEME = "file:" -PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3") +PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3", "omero-3d") diff --git a/cellprofiler_core/image/abstract_image/file/_file_image.py b/cellprofiler_core/image/abstract_image/file/_file_image.py index 4b1715d8..e0dee2d0 100644 --- a/cellprofiler_core/image/abstract_image/file/_file_image.py +++ b/cellprofiler_core/image/abstract_image/file/_file_image.py @@ -9,16 +9,23 @@ import numpy import skimage.io +import requests +from PIL import Image as PilImage +from io import BytesIO +from posixpath import join as urljoin + import cellprofiler_core.preferences from .._abstract_image import AbstractImage from ..._image import Image from ....utilities.image import is_numpy_file from ....utilities.image import is_matlab_file +from ....utilities.image import is_omero3d_path from ....utilities.image import loadmat from ....utilities.image import load_data_file from ....utilities.image import generate_presigned_url from ....constants.image import FILE_SCHEME, PASSTHROUGH_SCHEMES from ....utilities.pathname import pathname2url, url2pathname +from ....utilities.zarr import get_zarr_reader class FileImage(AbstractImage): @@ -35,6 +42,8 @@ def __init__( channel=None, volume=False, spacing=None, + z=None, + t=None, ): """ :param name: Name of image to be provided @@ -84,6 +93,8 @@ def __init__( self.__index = index self.__volume = volume self.__spacing = spacing + self.z_index = z if z is not None else 0 + self.t_index = t if t is not None else 0 self.scale = None @property @@ -176,6 +187,8 @@ def cache_file(self): ) finally: os.close(tempfd) + elif url.lower().endswith('.zarr'): + self.__cached_file = url else: from bioformats.formatreader import get_image_reader @@ -185,6 +198,8 @@ def cache_file(self): return True def get_full_name(self): + if is_omero3d_path(self.__url): + return self.get_url() self.cache_file() if self.__is_cached: return self.__cached_file @@ -203,8 +218,12 @@ def get_md5_hash(self, measurements): # # Cache the MD5 hash on the image reader # - if is_matlab_file(self.__filename) or is_numpy_file(self.__filename): + if (is_matlab_file(self.__filename) or + is_numpy_file(self.__filename) or + is_omero3d_path(self.get_url())): rdr = None + elif self.get_url().endswith('.zarr'): + rdr = get_zarr_reader(None, url=self.get_url()) else: from bioformats.formatreader import get_image_reader @@ -273,6 +292,8 @@ def __set_image(self): url = self.get_url() if url.lower().startswith("omero:"): rdr = get_image_reader(self.get_name(), url=url) + elif url.lower().endswith('.zarr'): + rdr = get_zarr_reader(self.get_name(), url=url) else: rdr = get_image_reader(self.get_name(), url=self.get_url()) if numpy.isscalar(self.index) or self.index is None: @@ -283,6 +304,8 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, + z=self.z_index, + t=self.t_index, ) else: # It's a stack @@ -305,6 +328,8 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, + z=self.z_index, + t=self.t_index, ) stack.append(img) img = numpy.dstack(stack) @@ -330,10 +355,61 @@ def provide_image(self, image_set): def __set_image_volume(self): pathname = url2pathname(self.get_url()) - + print("Using pathname: {} for url {}".format(pathname, self.get_url())) # Volume loading is currently limited to tiffs/numpy files only if is_numpy_file(self.__filename): data = numpy.load(pathname) + elif is_omero3d_path(self.__url): + scheme = 'omero-3d:' + url = self.__url.split(scheme)[1] + parsed_url = urllib.parse.urlparse(url) + query_params = urllib.parse.parse_qs(parsed_url.query) + zmin = int(query_params['zmin'][0]) + zmax = int(query_params['zmax'][0]) + width = int(query_params['width'][0]) + height = int(query_params['height'][0]) + image_id = query_params['imageid'][0] + channel = query_params['c'][0] + stack = numpy.ndarray((zmax - zmin + 1, height, width)) + for i in range(zmin, zmax + 1): + path = urljoin('/tile', image_id, str(i), channel, '0') + url = urllib.parse.urlunparse(( + parsed_url.scheme, + parsed_url.netloc, + path, + '', + parsed_url.query, + '' + )) + print("Requesting URL: {}".format(url)) + timeout = 2 + response = None + while timeout < 500: + try: + response = requests.get(url, timeout=timeout) + except Exception: + print('Get %s with timeout %s sec failed' % ( + url, timeout)) + timeout = timeout**2 + else: + break + if response is None: + raise Exception('Failed to retrieve data from URL') + image_bytes = BytesIO(response.content) + image = PilImage.open(image_bytes) + stack[i - zmin, :, :] = image + data = stack + elif pathname.endswith('.zarr'): + rdr = get_zarr_reader(self.get_name(), url=self.get_url()) + data = rdr.read( + c=self.channel, + series=self.series, + index=None, + rescale=False, + wants_max_intensity=False, + z=None, + t=self.t_index, + ) else: data = imageio.volread(pathname) diff --git a/cellprofiler_core/image/abstract_image/file/url/_color_image.py b/cellprofiler_core/image/abstract_image/file/url/_color_image.py index 45a903f2..0a520b5d 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_color_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_color_image.py @@ -7,7 +7,7 @@ class ColorImage(URLImage): """Provide a color image, tripling a monochrome plane if needed""" def __init__( - self, name, url, series, index, rescale=True, volume=False, spacing=None + self, name, url, series, index, rescale=True, volume=False, spacing=None, z=None, t=None, ): URLImage.__init__( self, @@ -18,6 +18,8 @@ def __init__( index=index, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py index 1ac74bf8..e4f7a37a 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py @@ -4,7 +4,7 @@ class MaskImage(MonochromeImage): """Provide a boolean image, converting nonzero to True, zero to False if needed""" - def __init__(self, name, url, series, index, channel, volume=False, spacing=None): + def __init__(self, name, url, series, index, channel, volume=False, spacing=None, z=None, t=None): MonochromeImage.__init__( self, name, @@ -15,6 +15,8 @@ def __init__(self, name, url, series, index, channel, volume=False, spacing=None channel=channel, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py index 188020c3..951d12d2 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py @@ -16,6 +16,8 @@ def __init__( rescale=True, volume=False, spacing=None, + z=None, + t=None, ): URLImage.__init__( self, @@ -27,6 +29,8 @@ def __init__( channel=channel, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_url_image.py b/cellprofiler_core/image/abstract_image/file/url/_url_image.py index 0486caec..fe21b724 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_url_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_url_image.py @@ -1,6 +1,8 @@ import os from .....utilities.pathname import url2pathname +from .....utilities.image import is_omero3d_path + from .._file_image import FileImage @@ -17,6 +19,8 @@ def __init__( channel=None, volume=False, spacing=None, + z=None, + t=None, ): if url.lower().startswith("file:"): path = url2pathname(url) @@ -25,11 +29,17 @@ def __init__( pathname = "" filename = url super(URLImage, self).__init__( - name, pathname, filename, rescale, series, index, channel, volume, spacing + name, pathname, filename, rescale, series, index, channel, volume, spacing, z=z, t=t, ) self.url = url def get_url(self): + if is_omero3d_path(self.url): + print("OMERO-3D URL: {}".format(self.url)) + url = self.url.split("omero-3d:")[1] + if url is not None: + return url + return self.url if self.cache_file(): return super(URLImage, self).get_url() return self.url diff --git a/cellprofiler_core/modules/loaddata.py b/cellprofiler_core/modules/loaddata.py index 75ee9767..cef6c4cd 100644 --- a/cellprofiler_core/modules/loaddata.py +++ b/cellprofiler_core/modules/loaddata.py @@ -1060,6 +1060,33 @@ def fetch_provider(self, name, measurements, is_image_name=True): frame = measurements["Image", frame_feature] else: frame = None + if url.endswith('.zarr'): + # Zarrs need czt indexing rather than just index. + c, z, t = None, None, None + + if measurements.has_feature("Image", f"Channel_{name}"): + c = measurements["Image", f"Channel_{name}"] + elif measurements.has_feature("Image", "Metadata_C"): + c = measurements["Image", "Metadata_C"] + if measurements.has_feature("Image", f"Z_{name}"): + z = measurements["Image", f"Z_{name}"] + elif measurements.has_feature("Image", "Metadata_Z"): + z = measurements["Image", "Metadata_Z"] + if measurements.has_feature("Image", f"T_{name}"): + t = measurements["Image", f"T_{name}"] + elif measurements.has_feature("Image", "Metadata_T"): + t = measurements["Image", "Metadata_T"] + return FileImage( + name, + path, + filename, + rescale=self.rescale.value and is_image_name, + series=series, + index=frame, + z=z, + channel=c, + t=t, + ) return FileImage( name, path, diff --git a/cellprofiler_core/modules/metadata.py b/cellprofiler_core/modules/metadata.py index 5f9019b3..fd1a0aa8 100644 --- a/cellprofiler_core/modules/metadata.py +++ b/cellprofiler_core/modules/metadata.py @@ -1133,6 +1133,7 @@ def msg(url): from bioformats.formatreader import get_omexml_metadata from typing import Optional, Any, Callable from dataclasses import dataclass + from cellprofiler_core.utilities.zarr import get_zarr_metadata @dataclass class ui_context: @@ -1173,6 +1174,7 @@ def update_all_urls(): for i, url in enumerate(urls): try: if not pbar_context.update_callback(pbar_context.dlg, i, url): + break if group.filter_choice == F_FILTERED_IMAGES: match = group.filter.evaluate( @@ -1186,7 +1188,10 @@ def update_all_urls(): continue metadata = filelist.get_metadata(url) if metadata is None: - metadata = get_omexml_metadata(url=url) + if url.lower().endswith('.zarr'): + metadata = get_zarr_metadata(url=url) + else: + metadata = get_omexml_metadata(url=url) filelist.add_metadata(url, metadata) except Exception as e: import logging @@ -1199,7 +1204,6 @@ def update_all_urls(): pbar_context.err_callback(errmsg) update_all_urls() - group.metadata_autoextracted.value = True def on_activated(self, workspace): diff --git a/cellprofiler_core/modules/namesandtypes.py b/cellprofiler_core/modules/namesandtypes.py index b922c7dc..f5faacf0 100644 --- a/cellprofiler_core/modules/namesandtypes.py +++ b/cellprofiler_core/modules/namesandtypes.py @@ -1981,7 +1981,20 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): javabridge.call(stack, "get", "([I)Ljava/lang/Object;", coords) ) ) + # CZT needed for zarr reading. Provision of index parameter will make bioformats readers ignore these. + if workspace.measurements.has_feature("Image", "Metadata_Z"): + z = workspace.measurements.get_measurement("Image", "Metadata_Z") + if z is not None: + z = int(z) + else: + z = None + if workspace.measurements.has_feature("Image", "Metadata_T"): + t = workspace.measurements.get_measurement("Image", "Metadata_T") + if t is not None: + t = int(t) + else: + t = None if len(ipds) == 1: interleaved = javabridge.get_static_field( "org/cellprofiler/imageset/ImagePlane", "INTERLEAVED", "I" @@ -1994,6 +2007,14 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): series = ipd.series index = ipd.index channel = ipd.channel + if url.lower().endswith('.zarr') and workspace.measurements.has_feature("Image", "Metadata_C"): + # Override channel index with real value if using zarrs + channelM = workspace.measurements.get_measurement("Image", "Metadata_C") + if channelM is not None: + channel = int(channelM) + else: + # Todo: Solve channel ID if mixed channel pipeline + channel = index if channel == monochrome: channel = None elif channel == interleaved: @@ -2001,7 +2022,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): if index == 0: index = None self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel + workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, ) elif all([ipd.url == ipds[0].url for ipd in ipds[1:]]): # Can load a simple image with a vector of series/index/channel @@ -2010,7 +2031,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): index = [ipd.index for ipd in ipds] channel = [None if ipd.channel < 0 else ipd.channel for ipd in ipds] self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel + workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, ) else: # Different URLs - someone is a clever sadist @@ -2023,7 +2044,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): ) def add_simple_image( - self, workspace, name, load_choice, rescale, url, series, index, channel + self, workspace, name, load_choice, rescale, url, series, index, channel, z=None, t=None, ): m = workspace.measurements @@ -2035,7 +2056,7 @@ def add_simple_image( if load_choice == LOAD_AS_COLOR_IMAGE: provider = ColorImage( - name, url, series, index, rescale, volume=volume, spacing=spacing + name, url, series, index, rescale, volume=volume, spacing=spacing, z=z, t=t, ) elif load_choice == LOAD_AS_GRAYSCALE_IMAGE: provider = MonochromeImage( @@ -2047,14 +2068,16 @@ def add_simple_image( rescale, volume=volume, spacing=spacing, + z=z, + t=t, ) elif load_choice == LOAD_AS_ILLUMINATION_FUNCTION: provider = MonochromeImage( - name, url, series, index, channel, False, volume=volume, spacing=spacing + name, url, series, index, channel, False, volume=volume, spacing=spacing, z=z, t=t, ) elif load_choice == LOAD_AS_MASK: provider = MaskImage( - name, url, series, index, channel, volume=volume, spacing=spacing + name, url, series, index, channel, volume=volume, spacing=spacing, z=z, t=t, ) workspace.image_set.providers.append(provider) diff --git a/cellprofiler_core/preferences/__init__.py b/cellprofiler_core/preferences/__init__.py index 708401c0..5a5be6d7 100644 --- a/cellprofiler_core/preferences/__init__.py +++ b/cellprofiler_core/preferences/__init__.py @@ -82,7 +82,12 @@ def get_config(): try: config = wx.Config.Get(False) except wx.PyNoAppError: - app = wx.App(0) + try: + app = wx.App(0) + except SystemExit: + # We're probably building on GitHub Actions + print("Python version doesn't support GUI, no app available.") + return __headless_config config = wx.Config.Get(False) if not config: wx.Config.Set( diff --git a/cellprofiler_core/utilities/image.py b/cellprofiler_core/utilities/image.py index 51fa71d1..f2984a4b 100644 --- a/cellprofiler_core/utilities/image.py +++ b/cellprofiler_core/utilities/image.py @@ -115,6 +115,9 @@ def is_numpy_file(filename): def is_matlab_file(filename): return os.path.splitext(filename)[-1].lower() == ".mat" +def is_omero3d_path(url): + return url.lower().startswith('omero-3d:') + def loadmat(path): imgdata = scipy.io.matlab.mio.loadmat(path, struct_as_record=True) diff --git a/cellprofiler_core/utilities/zarr.py b/cellprofiler_core/utilities/zarr.py new file mode 100644 index 00000000..bbf07c60 --- /dev/null +++ b/cellprofiler_core/utilities/zarr.py @@ -0,0 +1,279 @@ +import collections +import os +import re +import sys +import tempfile +import urllib.parse +from urllib.request import urlopen + +import numpy +import zarr +import boto3 +import shutil + +from cellprofiler_core.utilities.pathname import url2pathname + +import logging +logger = logging.getLogger(__name__) + + +def get_zarr_metadata(url): + xmlfile = 'METADATA.ome.xml' + parser = urllib.parse.urlparse(url) + if parser.scheme == 'file': + url = url2pathname(url) + elif parser.scheme == 's3': + client = boto3.client('s3') + bucket_name, key = re.compile('s3://([\w\d\-\.]+)/(.*)').search( + url).groups() + key += "/OME/METADATA.ome.xml" + url = client.generate_presigned_url( + 'get_object', + Params={'Bucket': bucket_name, 'Key': key.replace("+", " ")} + ) + src = urlopen(url) + return src.read().decode() + metadata_path = os.path.join(url, "OME", xmlfile) + if os.path.exists(metadata_path): + with open(metadata_path) as data: + return data.read() + elif os.path.exists(os.path.join(url, xmlfile)): + with open(os.path.join(url, xmlfile)) as data: + return data.read() + else: + logger.warning("Input zarr lacks an OME-XML file. " + "CellProfiler will try to construct metadata, but this feature is experimental") + return make_ome_xml(url) + + +def make_ome_xml(url): + # Prototype zarr parser to construct a fake OME XML. + root = zarr.open(url, mode='r') + queue = collections.deque() + queue.append(root) + xmlstr = """""" + while queue: + subject = queue.popleft() + for loc, group in subject.groups(): + queue.append(group) + for loc, array in subject.arrays(): + t, c, z, y, x = array.shape + dtype = array.dtype.name + if dtype == "int64": + # OME-XML can't handle int64, consider as a float instead. + dtype = "float" + xmlstr += \ + f"""UnknownCellProfiler OME Metadata """ + for i in range(c): + xmlstr += f"""""" + xmlstr += """ """ + # We only want the first resolution + break + return xmlstr + "" + + +def get_zarr_reader(key, path=None, url=None): + logger.debug("Getting image reader for: %s, %s, %s" % (key, path, url)) + from bioformats.formatreader import __image_reader_key_cache, __image_reader_cache, release_image_reader + if key in __image_reader_key_cache: + old_path, old_url = __image_reader_key_cache[key] + old_count, rdr = __image_reader_cache[old_path, old_url] + if old_path == path and old_url == url: + return rdr + release_image_reader(key) + if (path, url) in __image_reader_cache: + old_count, rdr = __image_reader_cache[path, url] + else: + rdr = ZarrReader(path, url) + old_count = 0 + __image_reader_cache[path, url] = (old_count + 1, rdr) + __image_reader_key_cache[key] = (path, url) + + return rdr + + +class ZarrReader(object): + def __init__(self, path=None, url=None, perform_init=True): + + self.stream = None + file_scheme = "file:" + + self.using_temp_file = False + + if url is not None: + url = str(url) + if url.lower().startswith(file_scheme): + url = url2pathname(url) + path = url + elif path is None: + path = url + + self.path = path + if path is None: + if not url.lower().startswith('s3:'): + self.path = self.download(url) + else: + if sys.platform.startswith("win"): + self.path = self.path.replace("/", os.path.sep) + filename = os.path.split(path)[1] + store = zarr.storage.FSStore(self.path) + if path.startswith('s3'): + logger.info("Zarr is stored on S3, will try to read directly.") + if '.zmetadata' in store: + # Zarr has consolidated metadata. + self.reader = zarr.convenience.open_consolidated(store, mode='r') + else: + logging.warning(f"Image is on S3 but lacks consolidated metadata. " + f"This may degrade reading performance. URL: {path}") + self.reader = zarr.open(store, mode='r') + elif not os.path.isdir(self.path): + raise IOError("The file, \"%s\", does not exist." % path) + else: + self.reader = zarr.open(store, mode='r') + self.well_map = self.map_wells() + self.series_list = self.map_series() + + def read(self, c=None, z=None, t=None, series=None, index=None, rescale=True, wants_max_intensity=True, channel_names=None, XYWH=None): + """Read a single plane from the image reader file. + :param c: read from this channel. `None` = read color image if multichannel + or interleaved RGB. + :param z: z-stack index + :param t: time index + :param series: series for ``.flex`` and similar multi-stack formats + :param index: if `None`, fall back to ``zct``, otherwise load the indexed frame + :param rescale: `True` to rescale the intensity scale to 0 and 1; `False` to + return the raw values native to the file. + :param wants_max_intensity: if `False`, only return the image; if `True`, + return a tuple of image and max intensity + :param channel_names: provide the channel names for the OME metadata + :param XYWH: a (x, y, w, h) tuple""" + # Index should always be None, we need ctz to properly index zarrs. + logger.debug(f"Reading {c=}, {z=}, {t=}, {series=}, {index=}, {XYWH=}") + c2 = None if c is None else c + 1 + z2 = None if z is None else z + 1 + t2 = None if t is None else t + 1 + if XYWH is not None: + x, y, w, h = XYWH + x = round(x) + y = round(y) + x2 = x + w + y2 = y + h + else: + y, y2, x, x2 = None, None, None, None + if self.well_map: + series_col, series_row, series_field = self.series_list[series] + base_path = self.well_map[(series_col, series_row)] + seriesreader = self.reader[base_path] + field = seriesreader.attrs['well']['images'][series_field]['path'] + # Hard-coding resolution 0 for now + seriesreader = seriesreader[field][0] + else: + seriesreader = self.reader[self.series_list[series]][0] + # Zarr arrays are indexed as TCZYX + if len(seriesreader.shape) == 5: + image = seriesreader[t:t2, c:c2, z:z2, y:y2, x:x2] + else: + image = seriesreader[c:c2, z:z2, y:y2, x:x2] + # Remove redundant axes + image = numpy.squeeze(image) + # C needs to be the last axis, but z should be first. Thank you CellProfiler. + if len(image.shape) > 2 and z is not None: + image = numpy.moveaxis(image, 0, -1) + elif len(image.shape) > 3: + image = numpy.moveaxis(image, 0, -1) + scale = numpy.iinfo(image.dtype).max + if rescale: + image = image.astype(float) / scale + if wants_max_intensity: + if image.dtype in [numpy.int8, numpy.uint8]: + scale = 255 + elif image.dtype in [numpy.int16, numpy.uint16]: + scale = 65535 + elif image.dtype == numpy.int32: + scale = 2 ** 32 - 1 + elif image.dtype == numpy.uint32: + scale = 2 ** 32 + else: + scale = 1 + return image, scale + return image + + def map_wells(self): + # For HCS zarrs, we construct a dictionary mapping well positions to array directories. + attrs = self.reader.attrs + if 'plate' not in attrs or 'wells' not in attrs['plate']: + return False + well_data = attrs['plate']['wells'] + mapper = {} + if 'column_index' in well_data[0]: + # Standard format + for row in well_data: + mapper[(str(row['column_index']), str(row['row_index']))] = row['path'] + else: + for row in well_data: + path = row['path'] + col, row = path.split('/', 1) + mapper[(str(col), str(row))] = path + return mapper + + def map_series(self): + # If in HCS mode we produce a list of (Row, Column, FieldNum) tuples to use with the well map. + # If in non-HCS mode we just make a list of paths to each series. + series_list = [] + if self.well_map: + metadata = get_zarr_metadata(self.path) + from lxml import etree + import io + context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}ImageRef") + for action, node in context: + wellsample = node.getparent() + well = wellsample.getparent() + series_list.append((well.get('Column'), well.get('Row'), well.getchildren().index(wellsample))) + node.clear() + if not series_list: + # No series were found, try constructing from the image tags. + context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}Image") + for action, node in context: + imagepath = node.attrib["Name"] + parts = imagepath.split('/') + series_list.append((parts[1], parts[2], int(parts[3]))) + node.clear() + else: + # No well metadata, just fetch series in order. + queue = collections.deque() + queue.append(self.reader) + while queue: + subject = queue.popleft() + for loc, group in subject.groups(): + queue.append(group) + for loc, array in subject.arrays(): + series_list.append(array.name) + # We only want the first resolution + break + return series_list + + def download(self, url): + # Cloned from bioformats' reader. Should temporarily download URLs. + # No idea if this will work since zarr is a directory-based format. + scheme = urllib.parse.urlparse(url)[0] + ext = url[url.rfind("."):] + urlpath = urllib.parse.urlparse(url)[2] + filename = os.path.basename(self.path) + + self.using_temp_file = True + + src = urlopen(url) + dest_fd, self.path = tempfile.mkstemp(suffix=ext) + try: + with os.fdopen(dest_fd, 'wb') as dest: + shutil.copyfileobj(src, dest) + except: + os.remove(self.path) + finally: + src.close() + + return filename + + def close(self): + # Zarr readers don't need to be explicitly closed. + pass