Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extendable zarr arrays #802

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d135018
add parameter to from_uri
dylanmcreynolds Oct 16, 2024
fa5b52c
growable zarr array
dylanmcreynolds Oct 26, 2024
d48d2b7
pass max_depth to server
dylanmcreynolds Oct 26, 2024
a8cc533
revert max_shaape
dylanmcreynolds Oct 26, 2024
b72b0bd
checkpoint
dylanmcreynolds Oct 26, 2024
2974eda
checkpoint
dylanmcreynolds Oct 27, 2024
7585eb0
Add endpoint link for append
dylanmcreynolds Oct 27, 2024
ef977db
fix errant code in write_block
dylanmcreynolds Oct 28, 2024
6bd0d80
Add append link to arrays, client sends shape and axis
dylanmcreynolds Oct 29, 2024
2688612
zarr adapter is called for write_block
dylanmcreynolds Oct 29, 2024
02bac70
fix growth of zarr array
dylanmcreynolds Oct 29, 2024
bc9f96d
update database for size in array append_block
dylanmcreynolds Oct 29, 2024
919a2dd
precommit cleanup
dylanmcreynolds Oct 29, 2024
9e54e8a
fix test
dylanmcreynolds Oct 29, 2024
a49ca2a
fix database issue
dylanmcreynolds Oct 30, 2024
bc7f79a
add client call docstring
dylanmcreynolds Oct 30, 2024
325abdb
WIP: Move type_aliases to root and fix misspelled name
danielballan Oct 31, 2024
a20238b
WIP: Rework to use PATCH /array/full
danielballan Oct 31, 2024
6e292a6
Appending works
danielballan Oct 31, 2024
ab5235d
Rename 'grow' to 'extend'.
danielballan Oct 31, 2024
5d4fe9a
Raise if assumptions are not met
danielballan Oct 31, 2024
11503a6
Improve usability. Test.
danielballan Oct 31, 2024
3479d42
Update imports after rebase.
danielballan Oct 31, 2024
5878d5e
Use Python 3.9 compatibile typing.
danielballan Oct 31, 2024
b8b9f81
Fix regression in refresh
danielballan Oct 31, 2024
f6ffb8a
Reference documentation
danielballan Oct 31, 2024
5542657
Add example to docstring.
danielballan Nov 1, 2024
4ad9e5f
Test overwrite and out-of-order updates.
danielballan Nov 1, 2024
0f053c7
Change from 'slice' to 'offset'.
danielballan Nov 5, 2024
8a8ffb6
update array test
dylanmcreynolds Oct 31, 2024
7a8b86b
add extend array to writing tutorial
dylanmcreynolds Nov 8, 2024
771037f
whitespace
dylanmcreynolds Nov 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/reference/python-client.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Tiled currently includes two clients for each structure family:
tiled.client.array.DaskArrayClient.export
tiled.client.array.DaskArrayClient.write
tiled.client.array.DaskArrayClient.write_block
tiled.client.array.DaskArrayClient.patch
```

```{eval-rst}
Expand Down
13 changes: 13 additions & 0 deletions docs/source/tutorials/writing.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ Write array and tabular data.
>>> client.write_array(numpy.array([4, 5, 6]), metadata={"color": "blue", "barcode": 11})
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Create an array and grow it by one
>>> new_array = client.write_array([1, 2, 3])
>>> new_array
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Extend the array. This array has only one dimension, here we extend by one
# along that dimension.
>>> new_array.patch([4], offset=(3,), extend=True)
>>> new_array
<ArrayClient shape=(4,) chunks=((3, 1),) dtype=int64>
>>> new_array.read()
array([1, 2, 3, 4])

# Write a table (DataFrame).
>>> import pandas
>>> client.write_dataframe(pandas.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}), metadata={"color": "green", "barcode": 12})
Expand Down
2 changes: 1 addition & 1 deletion tiled/_tests/test_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
SparseAdapter,
TableAdapter,
)
from tiled.adapters.type_alliases import JSON, Filters, NDSlice, Scopes
from tiled.server.schemas import Principal, PrincipalType
from tiled.structures.array import ArrayStructure, BuiltinDtype
from tiled.structures.awkward import AwkwardStructure
from tiled.structures.core import Spec, StructureFamily
from tiled.structures.sparse import COOStructure
from tiled.structures.table import TableStructure
from tiled.type_aliases import JSON, Filters, NDSlice, Scopes


class CustomArrayAdapter:
Expand Down
49 changes: 49 additions & 0 deletions tiled/_tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,55 @@ def test_write_array_chunked(tree):
assert result.specs == specs


def test_extend_array(tree):
"Extend an array with additional data, expanding its shape."
with Context.from_app(
build_app(tree, validation_registry=validation_registry)
) as context:
client = from_context(context)

a = numpy.ones((3, 2, 2))
new_data = numpy.ones((1, 2, 2)) * 2
full_array = numpy.concatenate((a, new_data), axis=0)

# Upload a (3, 2, 2) array.
ac = client.write_array(a)
assert ac.shape == a.shape

# Patching data into a region beyond the current extent of the array
# raises a ValueError (catching a 409 from the server).
with pytest.raises(ValueError):
ac.patch(new_data, offset=(3,))
# With extend=True, the array is expanded.
ac.patch(new_data, offset=(3,), extend=True)
# The local cache of the structure is updated.
assert ac.shape == full_array.shape
actual = ac.read()
# The array has the expected shape and data.
assert actual.shape == full_array.shape
numpy.testing.assert_equal(actual, full_array)

# Overwrite data (do not extend).
revised_data = numpy.ones((1, 2, 2)) * 3
revised_array = full_array.copy()
revised_array[3, :, :] = 3
ac.patch(revised_data, offset=(3,))
numpy.testing.assert_equal(ac.read(), revised_array)

# Extend out of order.
ones = numpy.ones((1, 2, 2))
ac.patch(ones * 7, offset=(7,), extend=True)
ac.patch(ones * 5, offset=(5,), extend=True)
ac.patch(ones * 6, offset=(6,), extend=True)
numpy.testing.assert_equal(ac[5:6], ones * 5)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we better use something like rng.random instead of ones here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, that would avoid rotational confusion.

numpy.testing.assert_equal(ac[6:7], ones * 6)
numpy.testing.assert_equal(ac[7:8], ones * 7)

# Offset given as an int is acceptable.
ac.patch(ones * 8, offset=8, extend=True)
numpy.testing.assert_equal(ac[8:9], ones * 8)


def test_write_dataframe_full(tree):
with Context.from_app(
build_app(tree, validation_registry=validation_registry)
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


class ArrayAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import Asset, DataSource, Management
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import ensure_uri, path_from_uri
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class ArrowAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/awkward.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from ..structures.awkward import AwkwardStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON
from .awkward_directory_container import DirectoryContainer
from .protocols import AccessPolicy
from .type_alliases import JSON


class AwkwardAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/awkward_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from ..server.schemas import Asset
from ..structures.awkward import AwkwardStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON
from ..utils import path_from_uri
from .awkward import AwkwardAdapter
from .awkward_directory_container import DirectoryContainer
from .protocols import AccessPolicy
from .type_alliases import JSON


class AwkwardBuffersAdapter(AwkwardAdapter):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import Asset, DataSource, Management
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import ensure_uri, path_from_uri
from .array import ArrayAdapter
from .dataframe import DataFrameAdapter
from .protocols import AccessPolicy
from .table import TableAdapter
from .type_alliases import JSON


def read_csv(
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import node_repr, path_from_uri
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .type_alliases import JSON

SWMR_DEFAULT = bool(int(os.getenv("TILED_HDF5_SWMR_DEFAULT", "0")))
INLINED_DEPTH = int(os.getenv("TILED_HDF5_INLINED_CONTENTS_MAX_DEPTH", "7"))
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/jpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .sequence import FileSequenceAdapter
from .type_alliases import JSON, NDSlice


class JPEGAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@
from ..server.schemas import SortingItem
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import UNCHANGED, Sentinel
from .protocols import AccessPolicy, AnyAdapter
from .type_alliases import JSON
from .utils import IndexersMixin

if sys.version_info < (3, 9):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from ..server.schemas import Asset
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import path_from_uri
from .dataframe import DataFrameAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class ParquetDatasetAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from ..structures.core import Spec, StructureFamily
from ..structures.sparse import SparseStructure
from ..structures.table import TableStructure
from ..type_aliases import JSON, Filters, NDSlice, Scopes
from .awkward_directory_container import DirectoryContainer
from .type_alliases import JSON, Filters, NDSlice, Scopes


class BaseAdapter(Protocol):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


def force_reshape(arr: np.array, desired_shape: Tuple[int, ...]) -> np.array:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

from ..structures.core import Spec, StructureFamily
from ..structures.sparse import COOStructure
from ..type_aliases import JSON, NDSlice
from .array import slice_and_shape_from_block_and_chunks
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


class COOAdapter:
Expand Down
19 changes: 7 additions & 12 deletions tiled/adapters/sparse_blocks_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
from ..server.schemas import Asset
from ..structures.core import Spec, StructureFamily
from ..structures.sparse import COOStructure
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


def load_block(uri: str) -> Tuple[List[int], Tuple[NDArray[Any], Any]]:
Expand Down Expand Up @@ -113,18 +113,13 @@ def write_block(
self,
data: Union[dask.dataframe.DataFrame, pandas.DataFrame],
block: Tuple[int, ...],
slice: NDSlice = ...,
) -> None:
"""

Parameters
----------
data :
block :

Returns
-------

"""
if slice != ...:
raise NotImplementedError(
"Writing into a slice of a sparse block is not yet supported."
)
"Write into a block of the array."
uri = self.blocks[block]
data.to_parquet(path_from_uri(uri))

Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class TableAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .sequence import FileSequenceAdapter
from .type_alliases import JSON, NDSlice


class TiffAdapter:
Expand Down
Loading
Loading