Skip to content

Commit e84e1c2

Browse files
authored
Merge pull request #10 from janelia-cellmap/refactor
added test for tiff and mrc chunk writing for different dim of input array data modified code to make sure chunk dim aligns with shape of the input data
2 parents 8c8694c + 336fea1 commit e84e1c2

File tree

7 files changed

+101
-4
lines changed

7 files changed

+101
-4
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ dist/
66
.zarrify_venv
77
.python-version
88
*.egg-info/
9-
build/
9+
build/
10+
.vscode/

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ dependencies = [
2929
"numcodecs>=0.13.0,<0.16",
3030
"pydantic-zarr>=0.4.0,<0.5.0",
3131
"pint>=0.20.0,<1.0.0",
32-
"tifffile>=2025.1.10,<2026.0.0",
32+
"tifffile>=2024.1.0,<2025.5.10",
3333
"zarr==2.18.3",
3434
"bokeh>=3.1.0"
3535
]

src/zarrify/formats/mrc.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from numcodecs import Zstd
1212
import logging
1313
import copy
14+
import numpy as np
1415

1516
class Mrc3D(Volume):
1617

@@ -31,8 +32,14 @@ def __init__(
3132

3233
self.memmap = mrcfile.mmap(self.src_path, mode="r")
3334
self.ndim = self.memmap.data.ndim
34-
self.shape = self.memmap.data.shape
35+
self.shape = np.squeeze(self.memmap.data.shape)
3536
self.dtype = self.memmap.data.dtype
37+
38+
# Scale metadata parameters to match data dimensionality
39+
self.metadata["axes"] = self.metadata["axes"][-self.ndim:]
40+
self.metadata["scale"] = self.metadata["scale"][-self.ndim:]
41+
self.metadata["translation"] = self.metadata["translation"][-self.ndim:]
42+
self.metadata["units"] = self.metadata["units"][-self.ndim:]
3643

3744

3845
def write_to_zarr(
@@ -53,6 +60,10 @@ def write_to_zarr(
5360
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
5461

5562
src_path = copy.copy(self.src_path)
63+
64+
if len(zarr_chunks) != len(self.shape):
65+
zarr_chunks = self.reshape_to_arr_shape(zarr_chunks, self.shape)
66+
5667
z_arr = self.get_output_array(dest, zarr_chunks, comp)
5768

5869
out_slices = slices_from_chunks(

src/zarrify/formats/tiff.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ def __init__(
3434

3535
self.shape = self.zarr_arr.shape
3636
self.dtype = self.zarr_arr.dtype
37+
self.ndim = self.zarr_arr.ndim
38+
39+
# Scale metadata parameters to match data dimensionality
40+
self.metadata["axes"] = self.metadata["axes"][-self.ndim:]
41+
self.metadata["scale"] = self.metadata["scale"][-self.ndim:]
42+
self.metadata["translation"] = self.metadata["translation"][-self.ndim:]
43+
self.metadata["units"] = self.metadata["units"][-self.ndim:]
3744

3845
def write_to_zarr(self,
3946
dest: str,
@@ -42,6 +49,10 @@ def write_to_zarr(self,
4249
comp : ABCMeta = Zstd(level=6),
4350
):
4451

52+
# reshape chunk shape to align with arr shape
53+
if len(zarr_chunks) != self.shape:
54+
zarr_chunks = self.reshape_to_arr_shape(zarr_chunks, self.shape)
55+
4556
z_arr = self.get_output_array(dest, zarr_chunks, comp)
4657
chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
4758

src/zarrify/formats/tiff_stack.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,14 @@ def __init__(
3737
probe_image_arr = da.from_zarr(probe_image_store)
3838

3939
self.dtype = probe_image_arr.dtype
40-
self.shape = [len(self.stack_list)] + list(probe_image_arr.shape)
40+
self.shape = np.squeeze([len(self.stack_list)] + list(probe_image_arr.shape))
41+
self.ndim = len(self.shape)
42+
43+
# Scale metadata parameters to match data dimensionality
44+
self.metadata["axes"] = self.metadata["axes"][-self.ndim:]
45+
self.metadata["scale"] = self.metadata["scale"][-self.ndim:]
46+
self.metadata["translation"] = self.metadata["translation"][-self.ndim:]
47+
self.metadata["units"] = self.metadata["units"][-self.ndim:]
4148

4249
def write_tile_slab_to_zarr(
4350
self, chunk_num: int, zarray: zarr.Array, src_volume: list
@@ -73,6 +80,10 @@ def write_to_zarr(self,
7380
comp : ABCMeta = Zstd(level=6),
7481
):
7582

83+
# reshape chunk shape to align with arr shape
84+
if len(zarr_chunks) != len(self.shape):
85+
zarr_chunks = self.reshape_to_arr_shape(zarr_chunks, self.shape)
86+
7687
z_arr = self.get_output_array(dest, zarr_chunks, comp)
7788
chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
7889

src/zarrify/utils/volume.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def get_output_array(self, dest: str, chunks: list[int], comp: ABCMeta) -> zarr.
3131
chunks=chunks,
3232
compressor=comp,
3333
)
34+
35+
def reshape_to_arr_shape(self, param_arr, ref_arr):
36+
from itertools import cycle, islice
37+
return list(islice(cycle(param_arr), len(ref_arr)))
3438

3539
def add_ome_metadata(self, dest: str):
3640
"""Add selected tiff metadata to zarr attributes file (.zattrs).

tests/test_to_zarr.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
import mrcfile
6+
import tifffile
7+
import zarr
8+
import numpy as np
9+
import os
10+
11+
from zarrify.utils.dask_utils import initialize_dask_client
12+
from zarrify import to_zarr
13+
14+
@pytest.fixture
15+
def create_test_file(tmp_path):
16+
"""Factory fixture to create test files with specified format and dimensions"""
17+
def _create_file(ext: str, shape: tuple):
18+
data = np.random.rand(*shape).astype(np.uint8)
19+
file_path = tmp_path / f"test_image_{len(shape)}d.{ext}"
20+
21+
if ext in ["tiff", "tif"]:
22+
tifffile.imwrite(file_path, data)
23+
elif ext == "mrc":
24+
with mrcfile.new(file_path, overwrite=True) as mrc:
25+
mrc.set_data(data.astype(data.dtype))
26+
else:
27+
raise ValueError("Unsupported file format")
28+
29+
return file_path, data
30+
return _create_file
31+
32+
import itertools
33+
34+
# Test parameters
35+
FORMATS = ['tif', 'tiff', 'mrc']
36+
SHAPES = [(40, 50), (1, 30, 50)] # 2D and 3D
37+
38+
@pytest.mark.parametrize("ext,shape", list(itertools.product(FORMATS, SHAPES)))
39+
def test_to_zarr(create_test_file, ext, shape):
40+
41+
src_path, expected_data = create_test_file(ext, shape)
42+
dest_path = Path(f"{src_path.with_suffix('')}.zarr")
43+
44+
dask_client = initialize_dask_client('local')
45+
46+
# convert to zarr
47+
to_zarr(src_path, dest_path, dask_client)
48+
49+
if src_path.suffix.lstrip('.') in ['tif', 'tiff']:
50+
src_data = tifffile.imread(src_path)
51+
elif src_path.suffix.lstrip('.') == 'mrc':
52+
with mrcfile.open(src_path, permissive=True) as mrc:
53+
src_data = mrc.data
54+
55+
# store array in s0 by convention
56+
dest_data = zarr.open(f'{dest_path}/s0', mode='r')
57+
assert np.array_equal(dest_data[:], src_data)
58+
assert np.array_equal(dest_data[:], expected_data)
59+

0 commit comments

Comments
 (0)