diff --git a/Cargo.lock b/Cargo.lock index 20c5105e..72f91906 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1197,7 +1197,7 @@ dependencies = [ [[package]] name = "icechunk" -version = "0.1.0-alpha.4" +version = "0.1.0-alpha.5" dependencies = [ "async-recursion", "async-stream", @@ -1233,7 +1233,7 @@ dependencies = [ [[package]] name = "icechunk-python" -version = "0.1.0-alpha.4" +version = "0.1.0-alpha.5" dependencies = [ "async-stream", "bytes", diff --git a/docs/docs/icechunk-python/configuration.md b/docs/docs/icechunk-python/configuration.md index ebbb5d92..6f79bb0d 100644 --- a/docs/docs/icechunk-python/configuration.md +++ b/docs/docs/icechunk-python/configuration.md @@ -17,7 +17,7 @@ When using Icechunk with s3 compatible storage systems, credentials must be prov With this option, the credentials for connecting to S3 are detected automatically from your environment. This is usually the best choice if you are connecting from within an AWS environment (e.g. from EC2). [See the API](./reference.md#icechunk.StorageConfig.s3_from_env) - + ```python icechunk.StorageConfig.s3_from_env( bucket="icechunk-test", @@ -28,7 +28,7 @@ When using Icechunk with s3 compatible storage systems, credentials must be prov === "Provide credentials" With this option, you provide your credentials and other details explicitly. [See the API](./reference.md#icechunk.StorageConfig.s3_from_config) - + ```python icechunk.StorageConfig.s3_from_config( bucket="icechunk-test", @@ -49,7 +49,7 @@ When using Icechunk with s3 compatible storage systems, credentials must be prov With this option, you connect to S3 anonymously (without credentials). This is suitable for public data. [See the API](./reference.md#icechunk.StorageConfig.s3_anonymous) - + ```python icechunk.StorageConfig.s3_anonymous( bucket="icechunk-test", @@ -153,8 +153,8 @@ Now we can now create or open an Icechunk store using our config. ) store = icechunk.IcechunkStore.create( - storage=storage, - mode="w", + storage=storage, + read_only=False, ) ``` @@ -167,8 +167,8 @@ Now we can now create or open an Icechunk store using our config. ) store = icechunk.IcechunkStore.create( - storage=storage, - mode="w", + storage=storage, + read_only=False, ) ``` @@ -188,8 +188,8 @@ Now we can now create or open an Icechunk store using our config. ) store = icechunk.IcechunkStore.open_existing( - storage=storage, - mode="r+", + storage=storage, + read_only=False, config=config, ) ``` @@ -204,11 +204,34 @@ Now we can now create or open an Icechunk store using our config. store = icechunk.IcechunkStore.open_existing( storage=storage, - mode='r+', + read_only=False, config=config, ) ``` -#### Access Mode +#### Read Only Mode + +Note that in all of the above examples, a `read_only` is provided to instruct the access level of the user to the store. This instructs whether the store should be opened in read only mode. When the store is marked read only, no write operations can be called and will resolve in a `ValueError`. + +It is possible to make a read only store writeable and vice versa: + +```python +# Store is opened writeable +store = icechunk.IcechunkStore.open_existing( + storage=storage, + read_only=False, + config=config, +) + +# Change in place to read_only +store.set_read_only() + +# Open another instance of the store that is writeable +writeable_store = store.as_writeable() + +# Open another read only instance of the store +another_store = writeable_store.as_read_only() -Note that in all of the above examples, a `mode` is provided to instruct the access level of the user to the store. This mode instructs whether the store should be opened in read only mode, and the store should start with a clean slate (although Icechunk prevents the possibility of accidentally overwriting any data that was previously comimtted to the store forever). For more about the access modes, see the [`zarr-python` docs](https://zarr.readthedocs.io/en/v3/_autoapi/zarr/abc/store/index.html#zarr.abc.store.AccessMode). +# Set it writeable in place +another_store.set_writeable() +``` diff --git a/icechunk-python/Cargo.toml b/icechunk-python/Cargo.toml index 37441c3b..f3e3c740 100644 --- a/icechunk-python/Cargo.toml +++ b/icechunk-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icechunk-python" -version = "0.1.0-alpha.4" +version = "0.1.0-alpha.5" description = "Transactional storage engine for Zarr designed for use on cloud object storage" readme = "../README.md" repository = "https://github.com/earth-mover/icechunk" diff --git a/icechunk-python/notebooks/demo-dummy-data.ipynb b/icechunk-python/notebooks/demo-dummy-data.ipynb index a4fc771f..0c297f54 100644 --- a/icechunk-python/notebooks/demo-dummy-data.ipynb +++ b/icechunk-python/notebooks/demo-dummy-data.ipynb @@ -43,7 +43,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -181,7 +181,7 @@ { "data": { "text/plain": [ - "'M419JDES7SDXBA6NCT4G'" + "'P21V8KTYGB4TZ0HHW5PG'" ] }, "execution_count": 6, @@ -230,7 +230,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "(('root-foo', /root-foo shape=(5, 5, 64, 128) dtype=int32>),)\n" + "(('root-foo', /root-foo shape=(5, 5, 64, 128) dtype=int32>),)\n" ] } ], @@ -240,67 +240,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "59254f22-48f2-4629-aec8-546cb12e8f02", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"shape\": [\n", - " 5,\n", - " 5,\n", - " 64,\n", - " 128\n", - " ],\n", - " \"data_type\": \"int32\",\n", - " \"chunk_grid\": {\n", - " \"name\": \"regular\",\n", - " \"configuration\": {\n", - " \"chunk_shape\": [\n", - " 1,\n", - " 2,\n", - " 8,\n", - " 2\n", - " ]\n", - " }\n", - " },\n", - " \"chunk_key_encoding\": {\n", - " \"name\": \"default\",\n", - " \"configuration\": {\n", - " \"separator\": \"/\"\n", - " }\n", - " },\n", - " \"fill_value\": -1,\n", - " \"codecs\": [\n", - " {\n", - " \"name\": \"bytes\",\n", - " \"configuration\": {\n", - " \"endian\": \"little\"\n", - " }\n", - " }\n", - " ],\n", - " \"dimension_names\": [\n", - " \"x\",\n", - " \"y\",\n", - " \"z\",\n", - " \"t\"\n", - " ],\n", - " \"attributes\": {\n", - " \"description\": \"icechunk test data\"\n", - " }\n", - "}\n" - ] - }, { "data": { "text/plain": [ "{'description': 'icechunk test data'}" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -311,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "78b45ec7-ead8-46c5-b553-476abbd2bca4", "metadata": {}, "outputs": [], @@ -321,17 +271,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "2399312c-d53f-443f-8be1-b8702ba6513e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'V3SFRWRM255Z3JC3SYH0'" + "'AMKMVBPK5BW7DKXNATG0'" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -343,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "edad201d-d9b3-4825-887a-1e6b3bf07e57", "metadata": {}, "outputs": [], @@ -362,15 +312,29 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "id": "d904f719-98cf-4f51-8e9a-1631dcb3fcba", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "store error: all commits must be made on a branch\n" + "ename": "ValueError", + "evalue": "store error: cannot write to read-only store", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m store\u001b[38;5;241m.\u001b[39mcheckout(first_commit)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mroot_group\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mupdate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnew attr 2\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 5\u001b[0m store\u001b[38;5;241m.\u001b[39mcommit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnew attr 2\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/attributes.py:26\u001b[0m, in \u001b[0;36mAttributes.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 24\u001b[0m new_attrs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mattributes)\n\u001b[1;32m 25\u001b[0m new_attrs[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_attrs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1594\u001b[0m, in \u001b[0;36mGroup.update_attributes\u001b[0;34m(self, new_attributes)\u001b[0m\n\u001b[1;32m 1593\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mupdate_attributes\u001b[39m(\u001b[38;5;28mself\u001b[39m, new_attributes: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Group:\n\u001b[0;32m-> 1594\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sync\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_async_group\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_attributes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1595\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:185\u001b[0m, in \u001b[0;36mSyncMixin._sync\u001b[0;34m(self, coroutine)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_sync\u001b[39m(\u001b[38;5;28mself\u001b[39m, coroutine: Coroutine[Any, Any, T]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;66;03m# TODO: refactor this to to take *args and **kwargs and pass those to the method\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# this should allow us to better type the sync wrapper\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoroutine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43masync.timeout\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:141\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coro, loop, timeout)\u001b[0m\n\u001b[1;32m 138\u001b[0m return_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished))\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m--> 141\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:100\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 100\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1152\u001b[0m, in \u001b[0;36mAsyncGroup.update_attributes\u001b[0;34m(self, new_attributes)\u001b[0m\n\u001b[1;32m 1149\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mattributes\u001b[38;5;241m.\u001b[39mupdate(new_attributes)\n\u001b[1;32m 1151\u001b[0m \u001b[38;5;66;03m# Write new metadata\u001b[39;00m\n\u001b[0;32m-> 1152\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_metadata()\n\u001b[1;32m 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:781\u001b[0m, in \u001b[0;36mAsyncGroup._save_metadata\u001b[0;34m(self, ensure_parents)\u001b[0m\n\u001b[1;32m 771\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m parent \u001b[38;5;129;01min\u001b[39;00m parents:\n\u001b[1;32m 772\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mextend(\n\u001b[1;32m 773\u001b[0m [\n\u001b[1;32m 774\u001b[0m (parent\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m key)\u001b[38;5;241m.\u001b[39mset_if_not_exists(value)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 778\u001b[0m ]\n\u001b[1;32m 779\u001b[0m )\n\u001b[0;32m--> 781\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/abc/store.py:451\u001b[0m, in \u001b[0;36mset_or_delete\u001b[0;34m(byte_setter, value)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mdelete()\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mset(value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/storage/common.py:146\u001b[0m, in \u001b[0;36mStorePath.set\u001b[0;34m(self, value, byte_range)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m byte_range \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStore.set does not have partial writes yet\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore\u001b[38;5;241m.\u001b[39mset(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath, value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:499\u001b[0m, in \u001b[0;36mIcechunkStore.set\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mset\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: \u001b[38;5;28mstr\u001b[39m, value: Buffer) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 492\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Store a (key, value) pair.\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \n\u001b[1;32m 494\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 497\u001b[0m \u001b[38;5;124;03m value : Buffer\u001b[39;00m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 499\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39mset(key, value\u001b[38;5;241m.\u001b[39mto_bytes())\n", + "\u001b[0;31mValueError\u001b[0m: store error: cannot write to read-only store" ] } ], @@ -396,17 +360,17 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "d31009db-8f99-48f1-b7bb-3f66875575cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'5QGW2PE1A5MTRZED190G'" + "'EBM53BMSFZY7RBBS2FSG'" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -414,6 +378,7 @@ "source": [ "store.reset()\n", "store.checkout(branch=\"main\")\n", + "store.set_writeable()\n", "root_group[\"root-foo\"].attrs[\"update\"] = \"new attr 2\"\n", "third_commit = store.commit(\"new attr 2\")\n", "third_commit" @@ -421,17 +386,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "03f8d62b-d8a7-452c-b086-340bfcb76d50", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'ARWA72NB2MAH90JJ285G'" + "'7M3VQJ5QAP4G1WJMCATG'" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -452,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "aee87354-4c44-4428-a4bf-d38d99b7e608", "metadata": {}, "outputs": [ @@ -462,7 +427,7 @@ "{'root-foo': dtype('int32')}" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -473,17 +438,17 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "f389f3f9-03d5-4625-9856-145e065785f2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'G1DMNFF0W1RCEEPY09B0'" + "'9QYWSAQHARBVWDQ0264G'" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -514,10 +479,35 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "bc9d1ef4-2c06-4147-ad4d-9e8051ac4ea8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyError", + "evalue": "'/root-foo'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m expected[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mroot-foo\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mroot_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mroot-foo\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1024\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mint32\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[4], line 26\u001b[0m, in \u001b[0;36mcreate_array\u001b[0;34m(group, name, size, dtype, fill_value)\u001b[0m\n\u001b[1;32m 22\u001b[0m attrs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdescription\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124micechunk test data\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 24\u001b[0m array, chunk_shape \u001b[38;5;241m=\u001b[39m generate_array_chunks(size\u001b[38;5;241m=\u001b[39msize, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m---> 26\u001b[0m \u001b[43mgroup\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunk_shape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunk_shape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[43mdimension_names\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[43mattributes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[43mexists_ok\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m array\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/_compat.py:43\u001b[0m, in \u001b[0;36m_deprecate_positional_args.._inner_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m extra_args \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mlen\u001b[39m(all_args)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_args \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# extra_args > 0\u001b[39;00m\n\u001b[1;32m 46\u001b[0m args_msg \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 47\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(kwonly_args[:extra_args], args[\u001b[38;5;241m-\u001b[39mextra_args:], strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 49\u001b[0m ]\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1758\u001b[0m, in \u001b[0;36mGroup.create_array\u001b[0;34m(self, name, shape, dtype, fill_value, attributes, chunk_shape, chunk_key_encoding, codecs, dimension_names, chunks, dimension_separator, order, filters, compressor, exists_ok, data)\u001b[0m\n\u001b[1;32m 1685\u001b[0m \u001b[38;5;129m@_deprecate_positional_args\u001b[39m\n\u001b[1;32m 1686\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_array\u001b[39m(\n\u001b[1;32m 1687\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1712\u001b[0m data: npt\u001b[38;5;241m.\u001b[39mArrayLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1713\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Array:\n\u001b[1;32m 1714\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1715\u001b[0m \u001b[38;5;124;03m Create a zarr array within this AsyncGroup.\u001b[39;00m\n\u001b[1;32m 1716\u001b[0m \u001b[38;5;124;03m This method lightly wraps AsyncArray.create.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1755\u001b[0m \n\u001b[1;32m 1756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 1757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Array(\n\u001b[0;32m-> 1758\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1759\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_async_group\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1760\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1761\u001b[0m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1762\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1764\u001b[0m \u001b[43m \u001b[49m\u001b[43mattributes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1765\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunk_shape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunk_shape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1766\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunk_key_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunk_key_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1767\u001b[0m \u001b[43m \u001b[49m\u001b[43mcodecs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcodecs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1768\u001b[0m \u001b[43m \u001b[49m\u001b[43mdimension_names\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdimension_names\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1769\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1770\u001b[0m \u001b[43m \u001b[49m\u001b[43mdimension_separator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdimension_separator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1771\u001b[0m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1772\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1773\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompressor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompressor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1774\u001b[0m \u001b[43m \u001b[49m\u001b[43mexists_ok\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexists_ok\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1775\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1776\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1777\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1778\u001b[0m )\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:185\u001b[0m, in \u001b[0;36mSyncMixin._sync\u001b[0;34m(self, coroutine)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_sync\u001b[39m(\u001b[38;5;28mself\u001b[39m, coroutine: Coroutine[Any, Any, T]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;66;03m# TODO: refactor this to to take *args and **kwargs and pass those to the method\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# this should allow us to better type the sync wrapper\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoroutine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43masync.timeout\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:141\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coro, loop, timeout)\u001b[0m\n\u001b[1;32m 138\u001b[0m return_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished))\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m--> 141\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:100\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 100\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1013\u001b[0m, in \u001b[0;36mAsyncGroup.create_array\u001b[0;34m(self, name, shape, dtype, fill_value, attributes, chunk_shape, chunk_key_encoding, codecs, dimension_names, chunks, dimension_separator, order, filters, compressor, exists_ok, data)\u001b[0m\n\u001b[1;32m 944\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_array\u001b[39m(\n\u001b[1;32m 945\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 946\u001b[0m name: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 970\u001b[0m data: npt\u001b[38;5;241m.\u001b[39mArrayLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 971\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m AsyncArray[ArrayV2Metadata] \u001b[38;5;241m|\u001b[39m AsyncArray[ArrayV3Metadata]:\n\u001b[1;32m 972\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 973\u001b[0m \u001b[38;5;124;03m Create a Zarr array within this AsyncGroup.\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;124;03m This method lightly wraps AsyncArray.create.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1011\u001b[0m \n\u001b[1;32m 1012\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1013\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m AsyncArray\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 1014\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m name,\n\u001b[1;32m 1015\u001b[0m shape\u001b[38;5;241m=\u001b[39mshape,\n\u001b[1;32m 1016\u001b[0m dtype\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[1;32m 1017\u001b[0m chunk_shape\u001b[38;5;241m=\u001b[39mchunk_shape,\n\u001b[1;32m 1018\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 1019\u001b[0m chunk_key_encoding\u001b[38;5;241m=\u001b[39mchunk_key_encoding,\n\u001b[1;32m 1020\u001b[0m codecs\u001b[38;5;241m=\u001b[39mcodecs,\n\u001b[1;32m 1021\u001b[0m dimension_names\u001b[38;5;241m=\u001b[39mdimension_names,\n\u001b[1;32m 1022\u001b[0m attributes\u001b[38;5;241m=\u001b[39mattributes,\n\u001b[1;32m 1023\u001b[0m chunks\u001b[38;5;241m=\u001b[39mchunks,\n\u001b[1;32m 1024\u001b[0m dimension_separator\u001b[38;5;241m=\u001b[39mdimension_separator,\n\u001b[1;32m 1025\u001b[0m order\u001b[38;5;241m=\u001b[39morder,\n\u001b[1;32m 1026\u001b[0m filters\u001b[38;5;241m=\u001b[39mfilters,\n\u001b[1;32m 1027\u001b[0m compressor\u001b[38;5;241m=\u001b[39mcompressor,\n\u001b[1;32m 1028\u001b[0m exists_ok\u001b[38;5;241m=\u001b[39mexists_ok,\n\u001b[1;32m 1029\u001b[0m zarr_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mzarr_format,\n\u001b[1;32m 1030\u001b[0m data\u001b[38;5;241m=\u001b[39mdata,\n\u001b[1;32m 1031\u001b[0m )\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/array.py:482\u001b[0m, in \u001b[0;36mAsyncArray.create\u001b[0;34m(cls, store, shape, dtype, zarr_format, fill_value, attributes, chunk_shape, chunk_key_encoding, codecs, dimension_names, chunks, dimension_separator, order, filters, compressor, exists_ok, data)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compressor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 479\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 480\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompressor cannot be used for arrays with version 3. Use bytes-to-bytes codecs instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 481\u001b[0m )\n\u001b[0;32m--> 482\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_create_v3(\n\u001b[1;32m 483\u001b[0m store_path,\n\u001b[1;32m 484\u001b[0m shape\u001b[38;5;241m=\u001b[39mshape,\n\u001b[1;32m 485\u001b[0m dtype\u001b[38;5;241m=\u001b[39mdtype_parsed,\n\u001b[1;32m 486\u001b[0m chunk_shape\u001b[38;5;241m=\u001b[39m_chunks,\n\u001b[1;32m 487\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 488\u001b[0m chunk_key_encoding\u001b[38;5;241m=\u001b[39mchunk_key_encoding,\n\u001b[1;32m 489\u001b[0m codecs\u001b[38;5;241m=\u001b[39mcodecs,\n\u001b[1;32m 490\u001b[0m dimension_names\u001b[38;5;241m=\u001b[39mdimension_names,\n\u001b[1;32m 491\u001b[0m attributes\u001b[38;5;241m=\u001b[39mattributes,\n\u001b[1;32m 492\u001b[0m exists_ok\u001b[38;5;241m=\u001b[39mexists_ok,\n\u001b[1;32m 493\u001b[0m order\u001b[38;5;241m=\u001b[39morder,\n\u001b[1;32m 494\u001b[0m )\n\u001b[1;32m 495\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m zarr_format \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m 496\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mstr\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 497\u001b[0m \u001b[38;5;66;03m# another special case: zarr v2 added the vlen-utf8 codec\u001b[39;00m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/array.py:595\u001b[0m, in \u001b[0;36mAsyncArray._create_v3\u001b[0;34m(cls, store_path, shape, dtype, chunk_shape, fill_value, order, chunk_key_encoding, codecs, dimension_names, attributes, exists_ok)\u001b[0m\n\u001b[1;32m 583\u001b[0m metadata \u001b[38;5;241m=\u001b[39m ArrayV3Metadata(\n\u001b[1;32m 584\u001b[0m shape\u001b[38;5;241m=\u001b[39mshape,\n\u001b[1;32m 585\u001b[0m data_type\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 591\u001b[0m attributes\u001b[38;5;241m=\u001b[39mattributes \u001b[38;5;129;01mor\u001b[39;00m {},\n\u001b[1;32m 592\u001b[0m )\n\u001b[1;32m 594\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(metadata\u001b[38;5;241m=\u001b[39mmetadata, store_path\u001b[38;5;241m=\u001b[39mstore_path, order\u001b[38;5;241m=\u001b[39morder)\n\u001b[0;32m--> 595\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m array\u001b[38;5;241m.\u001b[39m_save_metadata(metadata, ensure_parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 596\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m array\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/array.py:1090\u001b[0m, in \u001b[0;36mAsyncArray._save_metadata\u001b[0;34m(self, metadata, ensure_parents)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m parent \u001b[38;5;129;01min\u001b[39;00m parents:\n\u001b[1;32m 1081\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mextend(\n\u001b[1;32m 1082\u001b[0m [\n\u001b[1;32m 1083\u001b[0m (parent\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m key)\u001b[38;5;241m.\u001b[39mset_if_not_exists(value)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1087\u001b[0m ]\n\u001b[1;32m 1088\u001b[0m )\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m gather(\u001b[38;5;241m*\u001b[39mawaitables)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/abc/store.py:451\u001b[0m, in \u001b[0;36mset_or_delete\u001b[0;34m(byte_setter, value)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mdelete()\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mset(value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/storage/common.py:146\u001b[0m, in \u001b[0;36mStorePath.set\u001b[0;34m(self, value, byte_range)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m byte_range \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStore.set does not have partial writes yet\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore\u001b[38;5;241m.\u001b[39mset(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath, value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:499\u001b[0m, in \u001b[0;36mIcechunkStore.set\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mset\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: \u001b[38;5;28mstr\u001b[39m, value: Buffer) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 492\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Store a (key, value) pair.\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \n\u001b[1;32m 494\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 497\u001b[0m \u001b[38;5;124;03m value : Buffer\u001b[39;00m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 499\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39mset(key, value\u001b[38;5;241m.\u001b[39mto_bytes())\n", + "\u001b[0;31mKeyError\u001b[0m: '/root-foo'" + ] + } + ], "source": [ "expected[\"root-foo\"] = create_array(\n", " group=root_group,\n", @@ -530,17 +520,17 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "4264bbfa-4193-45e9-bc82-932f488bff28", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'RVZSK0518F73E6RSY990'" + "'DR8T4BN1SQ089SWJ6SGG'" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -559,20 +549,18 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "895faf9f-c1ec-4b9b-9676-f6b1745d73de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(('group2', /group2>),\n", - " ('group1', /group1>),\n", - " ('root-foo',\n", - " /root-foo shape=(4, 4, 64, 128) dtype=int32>))" + "(('group1', /group1>),\n", + " ('group2', /group2>))" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -583,115 +571,20 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "id": "14c5afc8-640f-464f-8ee0-b8631e0aacc7", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "store_path group1\n", - "{\n", - " \"shape\": [\n", - " 4,\n", - " 4,\n", - " 64,\n", - " 128\n", - " ],\n", - " \"data_type\": \"float32\",\n", - " \"chunk_grid\": {\n", - " \"name\": \"regular\",\n", - " \"configuration\": {\n", - " \"chunk_shape\": [\n", - " 1,\n", - " 2,\n", - " 8,\n", - " 2\n", - " ]\n", - " }\n", - " },\n", - " \"chunk_key_encoding\": {\n", - " \"name\": \"default\",\n", - " \"configuration\": {\n", - " \"separator\": \"/\"\n", - " }\n", - " },\n", - " \"fill_value\": -1234.0,\n", - " \"codecs\": [\n", - " {\n", - " \"name\": \"bytes\",\n", - " \"configuration\": {\n", - " \"endian\": \"little\"\n", - " }\n", - " }\n", - " ],\n", - " \"dimension_names\": [\n", - " \"x\",\n", - " \"y\",\n", - " \"z\",\n", - " \"t\"\n", - " ],\n", - " \"attributes\": {\n", - " \"description\": \"icechunk test data\"\n", - " }\n", - "}\n", - "{\n", - " \"shape\": [\n", - " 2,\n", - " 2,\n", - " 64,\n", - " 128\n", - " ],\n", - " \"data_type\": \"float16\",\n", - " \"chunk_grid\": {\n", - " \"name\": \"regular\",\n", - " \"configuration\": {\n", - " \"chunk_shape\": [\n", - " 1,\n", - " 1,\n", - " 8,\n", - " 2\n", - " ]\n", - " }\n", - " },\n", - " \"chunk_key_encoding\": {\n", - " \"name\": \"default\",\n", - " \"configuration\": {\n", - " \"separator\": \"/\"\n", - " }\n", - " },\n", - " \"fill_value\": -1234.0,\n", - " \"codecs\": [\n", - " {\n", - " \"name\": \"bytes\",\n", - " \"configuration\": {\n", - " \"endian\": \"little\"\n", - " }\n", - " }\n", - " ],\n", - " \"dimension_names\": [\n", - " \"x\",\n", - " \"y\",\n", - " \"z\",\n", - " \"t\"\n", - " ],\n", - " \"attributes\": {\n", - " \"description\": \"icechunk test data\"\n", - " }\n", - "}\n" - ] - }, { "data": { "text/plain": [ "(('foo1',\n", - " /group1/foo1 shape=(4, 4, 64, 128) dtype=float32>),\n", + " /group1/foo1 shape=(4, 4, 64, 128) dtype=float32>),\n", " ('foo2',\n", - " /group1/foo2 shape=(2, 2, 64, 128) dtype=float16>))" + " /group1/foo2 shape=(2, 2, 64, 128) dtype=float16>))" ] }, - "execution_count": 27, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -710,7 +603,7 @@ "data": { "text/plain": [ "(('foo3',\n", - " /group2/foo3 shape=(2, 2, 64, 128) dtype=int64>),)" + " /group2/foo3 shape=(2, 2, 64, 128) dtype=int64>),)" ] }, "execution_count": 23, @@ -740,20 +633,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "/group2/foo3 shape=(2, 2, 64, 128) dtype=int64>\n", - "/group2/foo3 shape=(4, 2, 64, 128) dtype=int64>\n", - "[ 0 16384]\n" + "/group2/foo3 shape=(2, 2, 64, 128) dtype=int64>\n", + "None\n" ] }, { - "data": { - "text/plain": [ - "'JHCPX1W73WZV399MYQZ0'" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'shape'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[24], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m array \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mresize((array\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m*\u001b[39marray\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m:]))\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(array)\n\u001b[0;32m----> 6\u001b[0m array[\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m :, \u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m] \u001b[38;5;241m=\u001b[39m expected[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgroup2/foo3\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(array[\u001b[38;5;241m2\u001b[39m:, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 8\u001b[0m expected[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgroup2/foo3\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mconcatenate([expected[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgroup2/foo3\u001b[39m\u001b[38;5;124m\"\u001b[39m]] \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'shape'" + ] } ], "source": [ diff --git a/icechunk-python/notebooks/demo-s3.ipynb b/icechunk-python/notebooks/demo-s3.ipynb index e69e3bfa..e308d110 100644 --- a/icechunk-python/notebooks/demo-s3.ipynb +++ b/icechunk-python/notebooks/demo-s3.ipynb @@ -48,11 +48,24 @@ "execution_count": 3, "id": "39e76b2a-e294-41a4-a1e4-2a1845eb4f2b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Error initializing repository: ref error: `storage error `S3ListObjectError(DispatchFailure(DispatchFailure { source: ConnectorError { kind: Other(None), source: ResolveEndpointError { message: \"A region must be set when sending requests to S3.\", source: None }, connection: Unknown } }))``", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m store \u001b[38;5;241m=\u001b[39m \u001b[43mIcechunkStore\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43ms3_storage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:144\u001b[0m, in \u001b[0;36mIcechunkStore.create\u001b[0;34m(cls, storage, read_only, config, *args, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Create a new IcechunkStore with the given storage configuration.\u001b[39;00m\n\u001b[1;32m 140\u001b[0m \n\u001b[1;32m 141\u001b[0m \u001b[38;5;124;03mIf a store already exists at the given location, an error will be raised.\u001b[39;00m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 143\u001b[0m config \u001b[38;5;241m=\u001b[39m config \u001b[38;5;129;01mor\u001b[39;00m StoreConfig()\n\u001b[0;32m--> 144\u001b[0m store \u001b[38;5;241m=\u001b[39m \u001b[43mpyicechunk_store_create\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstorage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(store\u001b[38;5;241m=\u001b[39mstore, read_only\u001b[38;5;241m=\u001b[39mread_only, args\u001b[38;5;241m=\u001b[39margs, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n", + "\u001b[0;31mValueError\u001b[0m: Error initializing repository: ref error: `storage error `S3ListObjectError(DispatchFailure(DispatchFailure { source: ConnectorError { kind: Other(None), source: ResolveEndpointError { message: \"A region must be set when sending requests to S3.\", source: None }, connection: Unknown } }))``" + ] + } + ], "source": [ "store = IcechunkStore.create(\n", " storage=s3_storage,\n", - " mode=\"w\",\n", + " read_only=False,\n", ")" ] }, @@ -1157,7 +1170,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "d0bf89c2-dd6e-4024-9083-0cb7c355fda4", "metadata": {}, "outputs": [ @@ -1175,7 +1188,7 @@ "source": [ "store = IcechunkStore.open_existing(\n", " storage=s3_storage,\n", - " mode=\"r\",\n", + " read_only=True,\n", ")\n", "store" ] diff --git a/icechunk-python/notebooks/version-control.ipynb b/icechunk-python/notebooks/version-control.ipynb index d77f323c..c001d688 100644 --- a/icechunk-python/notebooks/version-control.ipynb +++ b/icechunk-python/notebooks/version-control.ipynb @@ -38,7 +38,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -113,7 +113,7 @@ { "data": { "text/plain": [ - "'51MXCR5RTNGPC54Z7WJG'" + "'ZDNDG4GGFHAX4KSHQ000'" ] }, "execution_count": 5, @@ -156,7 +156,7 @@ { "data": { "text/plain": [ - "'45AE3AT46RHZCZ50HWEG'" + "'71B891MSWP6XM013N4NG'" ] }, "execution_count": 7, @@ -187,7 +187,7 @@ { "data": { "text/plain": [ - "'45AE3AT46RHZCZ50HWEG'" + "'71B891MSWP6XM013N4NG'" ] }, "execution_count": 8, @@ -224,7 +224,7 @@ { "data": { "text/plain": [ - "('45AE3AT46RHZCZ50HWEG', {'attr': 'second_attr'})" + "('71B891MSWP6XM013N4NG', {'attr': 'second_attr'})" ] }, "execution_count": 9, @@ -277,14 +277,23 @@ "outputs": [ { "ename": "ValueError", - "evalue": "store error: all commits must be made on a branch", + "evalue": "store error: cannot write to read-only store", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m root_group\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mattr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill_fail\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m store\u001b[38;5;241m.\u001b[39mcommit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthis should fail\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:261\u001b[0m, in \u001b[0;36mIcechunkStore.commit\u001b[0;34m(self, message)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcommit\u001b[39m(\u001b[38;5;28mself\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 256\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Commit any uncommitted changes to the store.\u001b[39;00m\n\u001b[1;32m 257\u001b[0m \n\u001b[1;32m 258\u001b[0m \u001b[38;5;124;03m This will create a new snapshot on the current branch and return\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;124;03m the snapshot id.\u001b[39;00m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39mcommit(message)\n", - "\u001b[0;31mValueError\u001b[0m: store error: all commits must be made on a branch" + "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mroot_group\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mattr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill_fail\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m store\u001b[38;5;241m.\u001b[39mcommit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthis should fail\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/attributes.py:26\u001b[0m, in \u001b[0;36mAttributes.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 24\u001b[0m new_attrs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mattributes)\n\u001b[1;32m 25\u001b[0m new_attrs[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_attrs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1594\u001b[0m, in \u001b[0;36mGroup.update_attributes\u001b[0;34m(self, new_attributes)\u001b[0m\n\u001b[1;32m 1593\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mupdate_attributes\u001b[39m(\u001b[38;5;28mself\u001b[39m, new_attributes: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Group:\n\u001b[0;32m-> 1594\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sync\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_async_group\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_attributes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1595\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:185\u001b[0m, in \u001b[0;36mSyncMixin._sync\u001b[0;34m(self, coroutine)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_sync\u001b[39m(\u001b[38;5;28mself\u001b[39m, coroutine: Coroutine[Any, Any, T]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;66;03m# TODO: refactor this to to take *args and **kwargs and pass those to the method\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# this should allow us to better type the sync wrapper\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoroutine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43masync.timeout\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:141\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coro, loop, timeout)\u001b[0m\n\u001b[1;32m 138\u001b[0m return_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished))\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m--> 141\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/sync.py:100\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 100\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:1152\u001b[0m, in \u001b[0;36mAsyncGroup.update_attributes\u001b[0;34m(self, new_attributes)\u001b[0m\n\u001b[1;32m 1149\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mattributes\u001b[38;5;241m.\u001b[39mupdate(new_attributes)\n\u001b[1;32m 1151\u001b[0m \u001b[38;5;66;03m# Write new metadata\u001b[39;00m\n\u001b[0;32m-> 1152\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_metadata()\n\u001b[1;32m 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/core/group.py:781\u001b[0m, in \u001b[0;36mAsyncGroup._save_metadata\u001b[0;34m(self, ensure_parents)\u001b[0m\n\u001b[1;32m 771\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m parent \u001b[38;5;129;01min\u001b[39;00m parents:\n\u001b[1;32m 772\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mextend(\n\u001b[1;32m 773\u001b[0m [\n\u001b[1;32m 774\u001b[0m (parent\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m key)\u001b[38;5;241m.\u001b[39mset_if_not_exists(value)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 778\u001b[0m ]\n\u001b[1;32m 779\u001b[0m )\n\u001b[0;32m--> 781\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/abc/store.py:451\u001b[0m, in \u001b[0;36mset_or_delete\u001b[0;34m(byte_setter, value)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mdelete()\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m byte_setter\u001b[38;5;241m.\u001b[39mset(value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/.venv/lib/python3.11/site-packages/zarr/storage/common.py:146\u001b[0m, in \u001b[0;36mStorePath.set\u001b[0;34m(self, value, byte_range)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m byte_range \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStore.set does not have partial writes yet\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore\u001b[38;5;241m.\u001b[39mset(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath, value)\n", + "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:499\u001b[0m, in \u001b[0;36mIcechunkStore.set\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mset\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: \u001b[38;5;28mstr\u001b[39m, value: Buffer) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 492\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Store a (key, value) pair.\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \n\u001b[1;32m 494\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 497\u001b[0m \u001b[38;5;124;03m value : Buffer\u001b[39;00m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 499\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39mset(key, value\u001b[38;5;241m.\u001b[39mto_bytes())\n", + "\u001b[0;31mValueError\u001b[0m: store error: cannot write to read-only store" ] } ], @@ -339,7 +348,8 @@ "metadata": {}, "outputs": [], "source": [ - "store.reset()" + "store.reset()\n", + "store.set_writeable()" ] }, { @@ -361,7 +371,7 @@ { "data": { "text/plain": [ - "'51MXCR5RTNGPC54Z7WJG'" + "'ZDNDG4GGFHAX4KSHQ000'" ] }, "execution_count": 15, diff --git a/icechunk-python/pyproject.toml b/icechunk-python/pyproject.toml index 49298160..a900ea4f 100644 --- a/icechunk-python/pyproject.toml +++ b/icechunk-python/pyproject.toml @@ -16,11 +16,11 @@ classifiers = [ license = { text = "Apache-2.0" } dynamic = ["version"] -dependencies = ["zarr==3.0.0b1"] +dependencies = ["zarr==3.0.0b2"] [tool.poetry] name = "icechunk" -version = "0.1.0-alpha.4" +version = "0.1.0-alpha.5" description = "Icechunk Python" authors = ["Earthmover "] readme = "README.md" diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py index 1ee677c4..3049a390 100644 --- a/icechunk-python/python/icechunk/__init__.py +++ b/icechunk-python/python/icechunk/__init__.py @@ -1,10 +1,10 @@ # module -from collections.abc import AsyncGenerator, Iterable +from collections.abc import AsyncGenerator, AsyncIterator, Iterable from typing import Any, Self from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype -from zarr.core.common import AccessModeLiteral, BytesLike +from zarr.core.common import BytesLike from zarr.core.sync import SyncMixin from ._icechunk_python import ( @@ -45,10 +45,10 @@ async def open(cls, *args: Any, **kwargs: Any) -> Self: @classmethod def open_or_create(cls, *args: Any, **kwargs: Any) -> Self: - if "mode" in kwargs: - mode = kwargs.pop("mode") + if "read_only" in kwargs: + read_only = kwargs.pop("read_only") else: - mode = "r" + read_only = False if "storage" in kwargs: storage = kwargs.pop("storage") @@ -58,38 +58,25 @@ def open_or_create(cls, *args: Any, **kwargs: Any) -> Self: ) store = None - match mode: - case "r" | "r+": - store = cls.open_existing(storage, mode, *args, **kwargs) - case "a": - if pyicechunk_store_exists(storage): - store = cls.open_existing(storage, mode, *args, **kwargs) - else: - store = cls.create(storage, mode, *args, **kwargs) - case "w": - if pyicechunk_store_exists(storage): - store = cls.open_existing(storage, mode, *args, **kwargs) - store.sync_clear() - else: - store = cls.create(storage, mode, *args, **kwargs) - case "w-": - if pyicechunk_store_exists(storage): - raise ValueError("""Zarr store already exists, open using mode "w" or "r+""""") - else: - store = cls.create(storage, mode, *args, **kwargs) - - assert(store) + if read_only: + store = cls.open_existing(storage, read_only, *args, **kwargs) + else: + if pyicechunk_store_exists(storage): + store = cls.open_existing(storage, read_only, *args, **kwargs) + else: + store = cls.create(storage, read_only, *args, **kwargs) + + assert store # We dont want to call _open() because icechunk handles the opening, etc. # if we have gotten this far we can mark it as open store._is_open = True return store - def __init__( self, store: PyIcechunkStore, - mode: AccessModeLiteral = "r", + read_only: bool = False, *args: Any, **kwargs: Any, ): @@ -97,7 +84,7 @@ def __init__( This should not be called directly, instead use the `create`, `open_existing` or `open_or_create` class methods. """ - super().__init__(*args, mode=mode, **kwargs) + super().__init__(read_only=read_only) if store is None: raise ValueError( "An IcechunkStore should not be created with the default constructor, instead use either the create or open_existing class methods." @@ -108,7 +95,7 @@ def __init__( def open_existing( cls, storage: StorageConfig, - mode: AccessModeLiteral = "r", + read_only: bool = False, config: StoreConfig | None = None, *args: Any, **kwargs: Any, @@ -120,11 +107,8 @@ def open_existing( It is recommended to use the cached storage option for better performance. If cached=True, this will be configured automatically with the provided storage_config as the underlying storage backend. - - If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable. """ config = config or StoreConfig() - read_only = mode == "r" # We have delayed checking if the repository exists, to avoid the delay in the happy case # So we need to check now if open fails, to provide a nice error message try: @@ -138,14 +122,16 @@ def open_existing( raise e else: # if the repo doesn't exists, we want to point users to that issue instead - raise ValueError("No Icechunk repository at the provided location, try opening in create mode or changing the location") from None - return cls(store=store, mode=mode, args=args, kwargs=kwargs) + raise ValueError( + "No Icechunk repository at the provided location, try opening in create mode or changing the location" + ) from None + return cls(store=store, read_only=read_only, args=args, kwargs=kwargs) @classmethod def create( cls, storage: StorageConfig, - mode: AccessModeLiteral = "w", + read_only: bool = False, config: StoreConfig | None = None, *args: Any, **kwargs: Any, @@ -156,47 +142,7 @@ def create( """ config = config or StoreConfig() store = pyicechunk_store_create(storage, config=config) - return cls(store=store, mode=mode, args=args, kwargs=kwargs) - - def set_mode(self, mode: AccessModeLiteral) -> None: - """ - Set the mode on this Store. - - Parameters - ---------- - mode: AccessModeLiteral - The new mode to use. - - Returns - ------- - None - - """ - read_only = mode == "r" - self._store.set_mode(read_only) - - - def with_mode(self, mode: AccessModeLiteral) -> Self: - """ - Return a new store of the same type pointing to the same location with a new mode. - - The returned Store is not automatically opened. Call :meth:`Store.open` before - using. - - Parameters - ---------- - mode: AccessModeLiteral - The new mode to use. - - Returns - ------- - store: - A new store of the same type with the new mode. - - """ - read_only = mode == "r" - new_store = self._store.with_mode(read_only) - return self.__class__(new_store, mode=mode) + return cls(store=store, read_only=read_only, args=args, kwargs=kwargs) def __eq__(self, value: object) -> bool: if not isinstance(value, self.__class__): @@ -211,12 +157,31 @@ def __getstate__(self) -> object: def __setstate__(self, state: Any) -> None: # we have to deserialize the bytes of the Rust store - mode = state["_mode"] - is_read_only = mode.readonly + read_only = state["_read_only"] store_repr = state["_store"] - state["_store"] = pyicechunk_store_from_bytes(store_repr, is_read_only) + state["_store"] = pyicechunk_store_from_bytes(store_repr, read_only) self.__dict__ = state + def as_read_only(self) -> Self: + """Return a read-only version of this store.""" + new_store = self._store.with_read_only(read_only=True) + return self.__class__(store=new_store, read_only=True) + + def as_writeable(self) -> Self: + """Return a writeable version of this store.""" + new_store = self._store.with_read_only(read_only=False) + return self.__class__(store=new_store, read_only=False) + + def set_read_only(self) -> None: + """Set the store to read-only mode.""" + self._store.set_read_only(read_only=True) + self._read_only = True + + def set_writeable(self) -> None: + """Set the store to writeable mode.""" + self._store.set_read_only(read_only=False) + self._read_only = False + @property def snapshot_id(self) -> str: """Return the current snapshot id.""" @@ -260,15 +225,21 @@ def checkout( raise ValueError( "only one of snapshot_id, branch, or tag may be specified" ) - return self._store.checkout_snapshot(snapshot_id) + self._store.checkout_snapshot(snapshot_id) + self._read_only = True + return if branch is not None: if tag is not None: raise ValueError( "only one of snapshot_id, branch, or tag may be specified" ) - return self._store.checkout_branch(branch) + self._store.checkout_branch(branch) + self._read_only = True + return if tag is not None: - return self._store.checkout_tag(tag) + self._store.checkout_tag(tag) + self._read_only = True + return raise ValueError("a snapshot_id, branch, or tag must be specified") @@ -289,15 +260,21 @@ async def async_checkout( raise ValueError( "only one of snapshot_id, branch, or tag may be specified" ) - return await self._store.async_checkout_snapshot(snapshot_id) + await self._store.async_checkout_snapshot(snapshot_id) + self._read_only = True + return if branch is not None: if tag is not None: raise ValueError( "only one of snapshot_id, branch, or tag may be specified" ) - return await self._store.async_checkout_branch(branch) + await self._store.async_checkout_branch(branch) + self._read_only = True + return if tag is not None: - return await self._store.async_checkout_tag(tag) + await self._store.async_checkout_tag(tag) + self._read_only = True + return raise ValueError("a snapshot_id, branch, or tag must be specified") @@ -326,7 +303,7 @@ async def async_commit(self, message: str) -> str: * some other writer updated the current branch since the repository was checked out """ return await self._store.async_commit(message) - + def merge(self, changes: bytes) -> None: """Merge the changes from another store into this store. @@ -341,7 +318,7 @@ def merge(self, changes: bytes) -> None: The behavior is undefined if the stores applied conflicting changes. """ return self._store.merge(changes) - + async def async_merge(self, changes: bytes) -> None: """Merge the changes from another store into this store. @@ -364,7 +341,7 @@ def has_uncommitted_changes(self) -> bool: async def async_reset(self) -> bytes: """Pop any uncommitted changes and reset to the previous snapshot state. - + Returns ------- bytes : The changes that were taken from the working set @@ -373,7 +350,7 @@ async def async_reset(self) -> bytes: def reset(self) -> bytes: """Pop any uncommitted changes and reset to the previous snapshot state. - + Returns ------- bytes : The changes that were taken from the working set @@ -431,8 +408,7 @@ async def async_tag(self, tag_name: str, snapshot_id: str) -> None: return await self._store.async_tag(tag_name, snapshot_id=snapshot_id) def ancestry(self) -> list[SnapshotMetadata]: - """Get the list of parents of the current version. - """ + """Get the list of parents of the current version.""" return self._store.ancestry() def async_ancestry(self) -> AsyncGenerator[SnapshotMetadata, None]: @@ -444,10 +420,6 @@ def async_ancestry(self) -> AsyncGenerator[SnapshotMetadata, None]: """ return self._store.async_ancestry() - async def empty(self) -> bool: - """Check if the store is empty.""" - return await self._store.empty() - async def clear(self) -> None: """Clear the store. @@ -463,6 +435,22 @@ def sync_clear(self) -> None: including all groups and all arrays. But it will not modify the repository history. """ return self._store.sync_clear() + + async def is_empty(self, prefix: str) -> bool: + """ + Check if the directory is empty. + + Parameters + ---------- + prefix : str + Prefix of keys to check. + + Returns + ------- + bool + True if the store is empty, False otherwise. + """ + return await self._store.is_empty(prefix) async def get( self, @@ -626,20 +614,25 @@ def supports_listing(self) -> bool: def supports_deletes(self) -> bool: return self._store.supports_deletes - def list(self) -> AsyncGenerator[str, None]: + def list(self) -> AsyncIterator[str]: """Retrieve all keys in the store. Returns ------- - AsyncGenerator[str, None] + AsyncIterator[str, None] """ + # This method should be async, like overridden methods in child classes. + # However, that's not straightforward: + # https://stackoverflow.com/questions/68905848 + # The zarr spec specefies that that this and other # listing methods should not be async, so we need to # wrap the async method in a sync method. return self._store.list() - def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """Retrieve all keys in the store with a given prefix. + def list_prefix(self, prefix: str) -> AsyncIterator[str]: + """Retrieve all keys in the store that begin with a given prefix. Keys are returned relative + to the root of the store. Parameters ---------- @@ -647,14 +640,14 @@ def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: Returns ------- - AsyncGenerator[str, None] + AsyncIterator[str, None] """ # The zarr spec specefies that that this and other # listing methods should not be async, so we need to # wrap the async method in a sync method. return self._store.list_prefix(prefix) - def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + def list_dir(self, prefix: str) -> AsyncIterator[str]: """ Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix. @@ -665,7 +658,7 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: Returns ------- - AsyncGenerator[str, None] + AsyncIterator[str, None] """ # The zarr spec specefies that that this and other # listing methods should not be async, so we need to diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 74fd42a0..baec311e 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -5,8 +5,8 @@ from typing import Any class PyIcechunkStore: def as_bytes(self) -> bytes: ... - def set_mode(self, read_only: bool) -> None: ... - def with_mode(self, read_only: bool) -> PyIcechunkStore: ... + def set_read_only(self, read_only: bool) -> None: ... + def with_read_only(self, read_only: bool) -> PyIcechunkStore: ... @property def snapshot_id(self) -> str: ... def change_set_bytes(self) -> bytes: ... @@ -34,7 +34,7 @@ class PyIcechunkStore: async def async_tag(self, tag: str, snapshot_id: str) -> None: ... def ancestry(self) -> list[SnapshotMetadata]: ... def async_ancestry(self) -> PyAsyncSnapshotGenerator: ... - async def empty(self) -> bool: ... + async def is_empty(self, prefix: str) -> bool: ... async def clear(self) -> None: ... def sync_clear(self) -> None: ... async def get( diff --git a/icechunk-python/src/errors.rs b/icechunk-python/src/errors.rs index b230eadd..4a1434f2 100644 --- a/icechunk-python/src/errors.rs +++ b/icechunk-python/src/errors.rs @@ -17,9 +17,9 @@ use thiserror::Error; #[allow(dead_code)] pub(crate) enum PyIcechunkStoreError { #[error("store error: {0}")] - StoreError(#[from] StoreError), + StoreError(StoreError), #[error("repository error: {0}")] - RepositoryError(#[from] RepositoryError), + RepositoryError(RepositoryError), #[error("icechunk format error: {0}")] IcechunkFormatError(#[from] IcechunkFormatError), #[error("{0}")] @@ -32,6 +32,30 @@ pub(crate) enum PyIcechunkStoreError { UnkownError(String), } +impl From for PyIcechunkStoreError { + fn from(error: StoreError) -> Self { + match error { + StoreError::NotFound(e) => PyIcechunkStoreError::PyKeyError(e.to_string()), + StoreError::RepositoryError(RepositoryError::NodeNotFound { + path, + message: _, + }) => PyIcechunkStoreError::PyKeyError(format!("{}", path)), + _ => PyIcechunkStoreError::StoreError(error), + } + } +} + +impl From for PyIcechunkStoreError { + fn from(error: RepositoryError) -> Self { + match error { + RepositoryError::NodeNotFound { path, message: _ } => { + PyIcechunkStoreError::PyKeyError(format!("{}", path)) + } + _ => PyIcechunkStoreError::RepositoryError(error), + } + } +} + impl From for PyErr { fn from(error: PyIcechunkStoreError) -> Self { match error { diff --git a/icechunk-python/src/lib.rs b/icechunk-python/src/lib.rs index 5c77e5a0..97c010d9 100644 --- a/icechunk-python/src/lib.rs +++ b/icechunk-python/src/lib.rs @@ -333,7 +333,7 @@ impl PyIcechunkStore { Ok(Cow::Owned(serialized)) } - fn set_mode(&self, read_only: bool) -> PyResult<()> { + fn set_read_only(&self, read_only: bool) -> PyResult<()> { let access_mode = if read_only { icechunk::zarr::AccessMode::ReadOnly } else { @@ -345,7 +345,7 @@ impl PyIcechunkStore { Ok(()) } - fn with_mode(&self, read_only: bool) -> PyResult { + fn with_read_only(&self, read_only: bool) -> PyResult { let access_mode = if read_only { icechunk::zarr::AccessMode::ReadOnly } else { @@ -628,11 +628,19 @@ impl PyIcechunkStore { Ok(PyAsyncGenerator::new(prepared_list)) } - fn empty<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn is_empty<'py>( + &'py self, + py: Python<'py>, + prefix: String, + ) -> PyResult> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::future_into_py(py, async move { - let is_empty = - store.read().await.empty().await.map_err(PyIcechunkStoreError::from)?; + let is_empty = store + .read() + .await + .is_empty(&prefix) + .await + .map_err(PyIcechunkStoreError::from)?; Ok(is_empty) }) } @@ -727,7 +735,8 @@ impl PyIcechunkStore { .await .exists(&key) .await - .map_err(PyIcechunkStoreError::StoreError)?; + .map_err(PyIcechunkStoreError::from)?; + Ok(exists) }) } @@ -823,12 +832,7 @@ impl PyIcechunkStore { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::future_into_py(py, async move { - store - .read() - .await - .delete(&key) - .await - .map_err(PyIcechunkStoreError::StoreError)?; + store.read().await.delete(&key).await.map_err(PyIcechunkStoreError::from)?; Ok(()) }) } @@ -940,7 +944,7 @@ async fn do_checkout_snapshot( store .checkout(VersionInfo::SnapshotId(snapshot_id)) .await - .map_err(PyIcechunkStoreError::StoreError)?; + .map_err(PyIcechunkStoreError::from)?; Ok(()) } @@ -949,16 +953,13 @@ async fn do_checkout_branch(store: Arc>, branch: String) -> PyResu store .checkout(VersionInfo::BranchTipRef(branch)) .await - .map_err(PyIcechunkStoreError::StoreError)?; + .map_err(PyIcechunkStoreError::from)?; Ok(()) } async fn do_checkout_tag(store: Arc>, tag: String) -> PyResult<()> { let mut store = store.write().await; - store - .checkout(VersionInfo::TagRef(tag)) - .await - .map_err(PyIcechunkStoreError::StoreError)?; + store.checkout(VersionInfo::TagRef(tag)).await.map_err(PyIcechunkStoreError::from)?; Ok(()) } @@ -967,7 +968,7 @@ async fn do_merge( other_change_set_bytes: Vec, ) -> PyResult<()> { let change_set = ChangeSet::import_from_bytes(&other_change_set_bytes) - .map_err(PyIcechunkStoreError::RepositoryError)?; + .map_err(PyIcechunkStoreError::from)?; let store = store.write().await; store.merge(change_set).await; @@ -978,7 +979,7 @@ async fn do_reset<'py>(store: Arc>) -> PyResult> { let changes = store.write().await.reset().await.map_err(PyIcechunkStoreError::StoreError)?; let serialized_changes = - changes.export_to_bytes().map_err(PyIcechunkStoreError::RepositoryError)?; + changes.export_to_bytes().map_err(PyIcechunkStoreError::from)?; Ok(serialized_changes) } diff --git a/icechunk-python/tests/test_can_read_old.py b/icechunk-python/tests/test_can_read_old.py index 5b140ec4..f097a968 100644 --- a/icechunk-python/tests/test_can_read_old.py +++ b/icechunk-python/tests/test_can_read_old.py @@ -36,7 +36,7 @@ def write_chunks_to_minio(chunks: list[tuple[str, bytes]]): store.put(key, data) -def mk_store(mode): +def mk_store(read_only: bool): """Create a store that can access virtual chunks in localhost MinIO""" store_path = "./tests/data/test-repo" store = ic.IcechunkStore.open_or_create( @@ -53,7 +53,7 @@ def mk_store(mode): region="us-east-1", ), ), - mode=mode, + read_only=read_only, ) return store @@ -69,7 +69,7 @@ async def write_a_test_repo(): """ print("Writing repository to ./tests/data/test-repo") - store = mk_store("w") + store = mk_store(read_only=False) root = zarr.group(store=store) group1 = root.create_group( @@ -142,7 +142,7 @@ async def write_a_test_repo(): async def test_icechunk_can_read_old_repo(): - store = mk_store("r") + store = mk_store(read_only=True) expected_main_history = [ "set virtual chunk", @@ -165,7 +165,7 @@ async def test_icechunk_can_read_old_repo(): store.checkout(tag="it works!") assert [p.message for p in store.ancestry()] == expected_branch_history[1:] - store = mk_store("r") + store = mk_store(read_only=False) store.checkout(branch="my-branch") assert sorted([p async for p in store.list_dir("")]) == [ "group1", @@ -194,7 +194,7 @@ async def test_icechunk_can_read_old_repo(): [p async for p in store.list_dir("group2/group3/group4/group5/inner")] ) == ["c", "zarr.json"] - root = zarr.group(store=store) + root = zarr.group(store=store.as_writeable()) # inner is not initialized, so it's all fill values inner = root["group2/group3/group4/group5/inner"] assert_array_equal(inner[:], float("nan")) diff --git a/icechunk-python/tests/test_concurrency.py b/icechunk-python/tests/test_concurrency.py index 95504fbf..f833b8a0 100644 --- a/icechunk-python/tests/test_concurrency.py +++ b/icechunk-python/tests/test_concurrency.py @@ -40,7 +40,7 @@ async def list_store(store, barrier): async def test_concurrency(): store = icechunk.IcechunkStore.open_or_create( - mode="w", + read_only=False, storage=icechunk.StorageConfig.memory(prefix="concurrency"), ) diff --git a/icechunk-python/tests/test_config.py b/icechunk-python/tests/test_config.py index 5c4bfe93..d857871a 100644 --- a/icechunk-python/tests/test_config.py +++ b/icechunk-python/tests/test_config.py @@ -10,7 +10,7 @@ async def tmp_store(tmpdir): store_path = f"{tmpdir}" store = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(store_path), - mode="a", + read_only=False, config=icechunk.StoreConfig(inline_chunk_threshold_bytes=5), ) diff --git a/icechunk-python/tests/test_distributed_writers.py b/icechunk-python/tests/test_distributed_writers.py index f0b73c6e..e46316b3 100644 --- a/icechunk-python/tests/test_distributed_writers.py +++ b/icechunk-python/tests/test_distributed_writers.py @@ -22,7 +22,7 @@ def mk_store( - mode: str, storage_config: dict[str, Any], store_config: dict[str, Any] + read_only: bool, storage_config: dict[str, Any], store_config: dict[str, Any] ) -> IcechunkStore: storage_config = icechunk.StorageConfig.s3_from_config( **storage_config, @@ -35,7 +35,7 @@ def mk_store( store = icechunk.IcechunkStore.open_or_create( storage=storage_config, - mode="a", + read_only=read_only, config=store_config, ) @@ -61,7 +61,7 @@ async def test_distributed_writers(): "allow_http": True, } store_config = {"inline_chunk_threshold_bytes": 5} - store = mk_store("r+", storage_config=storage_config, store_config=store_config) + store = mk_store(read_only=False, storage_config=storage_config, store_config=store_config) shape = (CHUNKS_PER_DIM * CHUNK_DIM_SIZE,) * 2 dask_chunks = (CHUNK_DIM_SIZE * CHUNKS_PER_TASK,) * 2 @@ -82,13 +82,13 @@ async def test_distributed_writers(): assert commit_res # Lets open a new store to verify the results - store = mk_store("r", storage_config=storage_config, store_config=store_config) + store = mk_store(read_only=True, storage_config=storage_config, store_config=store_config) all_keys = [key async for key in store.list_prefix("/")] assert ( len(all_keys) == 1 + 1 + CHUNKS_PER_DIM * CHUNKS_PER_DIM ) # group meta + array meta + each chunk - group = zarr.group(store=store, overwrite=False) + group = zarr.open_group(store=store, mode="r") roundtripped = dask.array.from_array(group["array"], chunks=dask_chunks) with warnings.catch_warnings(): diff --git a/icechunk-python/tests/test_pickle.py b/icechunk-python/tests/test_pickle.py index 40cddefc..87c8dd66 100644 --- a/icechunk-python/tests/test_pickle.py +++ b/icechunk-python/tests/test_pickle.py @@ -11,7 +11,7 @@ async def tmp_store(tmpdir): store_path = f"{tmpdir}" store = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(store_path), - mode="w", + read_only=False, ) yield store @@ -41,23 +41,23 @@ async def test_pickle(tmp_store): async def test_store_equality(tmpdir, tmp_store): assert tmp_store == tmp_store - local_store = await LocalStore.open(f"{tmpdir}/zarr", mode="w") + local_store = await LocalStore.open(f"{tmpdir}/zarr", read_only=False) assert tmp_store != local_store store2 = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.memory(prefix="test"), - mode="w", + read_only=False, ) assert tmp_store != store2 store3 = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(f"{tmpdir}/test"), - mode="a", + read_only=False, ) assert tmp_store != store3 store4 = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(f"{tmpdir}/test"), - mode="a", + read_only=False, ) assert store3 == store4 diff --git a/icechunk-python/tests/test_timetravel.py b/icechunk-python/tests/test_timetravel.py index 4a13a643..e91e8037 100644 --- a/icechunk-python/tests/test_timetravel.py +++ b/icechunk-python/tests/test_timetravel.py @@ -8,6 +8,7 @@ def test_timetravel(): store = icechunk.IcechunkStore.create( storage=icechunk.StorageConfig.memory("test"), config=icechunk.StoreConfig(inline_chunk_threshold_bytes=1), + read_only=False, ) group = zarr.group(store=store, overwrite=True) @@ -26,13 +27,18 @@ def test_timetravel(): new_snapshot_id = store.commit("commit 2") store.checkout(snapshot_id=snapshot_id) + assert store.read_only assert air_temp[200, 6] == 42 store.checkout(snapshot_id=new_snapshot_id) + assert store.read_only assert air_temp[200, 6] == 54 store.checkout(branch="main") - store.set_mode("w-") + + store.set_writeable() + assert not store.read_only + air_temp[:, :] = 76 assert store.has_uncommitted_changes assert store.branch == "main" diff --git a/icechunk-python/tests/test_virtual_ref.py b/icechunk-python/tests/test_virtual_ref.py index 94dee971..d320b3f8 100644 --- a/icechunk-python/tests/test_virtual_ref.py +++ b/icechunk-python/tests/test_virtual_ref.py @@ -98,7 +98,7 @@ async def test_from_s3_public_virtual_refs(tmpdir): # Open the store, store = IcechunkStore.open_or_create( storage=StorageConfig.filesystem(f'{tmpdir}/virtual'), - mode="w", + read_only=False, config=StoreConfig( virtual_ref_config=VirtualRefConfig.s3_anonymous(region="us-east-1", allow_http=False) ), @@ -122,6 +122,3 @@ async def test_from_s3_public_virtual_refs(tmpdir): assert len(depth_values) == 10 actual_values = np.array([-0.95,-0.85,-0.75,-0.65,-0.55,-0.45,-0.35,-0.25,-0.15,-0.05]) assert np.allclose(depth_values, actual_values) - - - diff --git a/icechunk-python/tests/test_zarr/test_api.py b/icechunk-python/tests/test_zarr/test_api.py index 5baf8fd7..ce1fb5e9 100644 --- a/icechunk-python/tests/test_zarr/test_api.py +++ b/icechunk-python/tests/test_zarr/test_api.py @@ -17,6 +17,7 @@ save_array, save_group, ) +from zarr.core.common import MemoryOrder, ZarrFormat from zarr.storage._utils import normalize_path from ..conftest import parse_store @@ -122,6 +123,33 @@ async def test_open_group(memory_store: IcechunkStore) -> None: # assert g.read_only +@pytest.mark.parametrize("n_args", [10, 1, 0]) +@pytest.mark.parametrize("n_kwargs", [10, 1, 0]) +def test_save(memory_store: IcechunkStore, n_args: int, n_kwargs: int) -> None: + store = memory_store + data = np.arange(10) + args = [np.arange(10) for _ in range(n_args)] + kwargs = {f"arg_{i}": data for i in range(n_kwargs)} + + if n_kwargs == 0 and n_args == 0: + with pytest.raises(ValueError): + save(store) + elif n_args == 1 and n_kwargs == 0: + save(store, *args) + array = open(store) + assert isinstance(array, Array) + assert_array_equal(array[:], data) + else: + save(store, *args, **kwargs) # type: ignore[arg-type] + group = open(store) + assert isinstance(group, Group) + for array in group.array_values(): + assert_array_equal(array[:], data) + for k in kwargs: + assert k in group + assert group.nmembers() == n_args + n_kwargs + + def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided @@ -132,6 +160,10 @@ def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided save("data/group.zarr") + with pytest.raises(TypeError): + # mode is no valid argument and would get handled as an array + a = np.arange(10) + zarr.save("data/example.zarr", a, mode="w") def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: @@ -212,6 +244,22 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: # assert "LazyLoader: " in repr(loader) +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_order(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: + arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + def test_load_array(memory_store: Store) -> None: store = memory_store foo = np.arange(100) diff --git a/icechunk-python/tests/test_zarr/test_array.py b/icechunk-python/tests/test_zarr/test_array.py index 704fcf8e..8b362460 100644 --- a/icechunk-python/tests/test_zarr/test_array.py +++ b/icechunk-python/tests/test_zarr/test_array.py @@ -1,3 +1,5 @@ +import json +import math import pickle from itertools import accumulate from typing import Any, Literal @@ -11,7 +13,8 @@ from zarr import Array, AsyncArray, AsyncGroup, Group from zarr.codecs import BytesCodec, VLenBytesCodec from zarr.core.array import chunks_initialized -from zarr.core.common import JSON, ZarrFormat +from zarr.core.buffer import default_buffer_prototype +from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.indexing import ceildiv from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError @@ -261,7 +264,9 @@ def test_storage_transformers(store: IcechunkStore) -> None: "fill_value": 0, "storage_transformers": ({"test": "should_raise"}), } - match = "Arrays with storage transformers are not supported in zarr-python at this time." + match = ( + "Arrays with storage transformers are not supported in zarr-python at this time." + ) with pytest.raises(ValueError, match=match): Array.from_dict(StorePath(store), data=metadata_dict) @@ -269,12 +274,16 @@ def test_storage_transformers(store: IcechunkStore) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) @pytest.mark.parametrize("nchunks", [2, 5, 10]) -def test_nchunks(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> None: +def test_nchunks( + store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int +) -> None: """ Test that nchunks returns the number of chunks defined for the array. """ shape = 100 - arr = Array.create(store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4") + arr = Array.create( + store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4" + ) expected = nchunks if test_cls == Array: observed = arr.nchunks @@ -285,7 +294,9 @@ def test_nchunks(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[A @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_nchunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_nchunks_initialized( + store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]] +) -> None: """ Test that nchunks_initialized accurately returns the number of stored chunks. """ @@ -298,7 +309,7 @@ def test_nchunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[ if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() assert observed == expected # delete chunks @@ -307,14 +318,13 @@ def test_nchunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[ if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() expected = arr.nchunks - idx - 1 assert observed == expected @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_chunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_chunks_initialized(store: IcechunkStore) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ @@ -326,10 +336,7 @@ def test_chunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[A for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False): arr[region] = 1 - if test_cls == Array: - observed = sorted(chunks_initialized(arr)) - else: - observed = sorted(chunks_initialized(arr._async_array)) + observed = sorted(await chunks_initialized(arr._async_array)) expected = sorted(keys) assert observed == expected @@ -361,9 +368,7 @@ def test_vlen_errors(store: IcechunkStore) -> None: ValueError, match="For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `BytesCodec`.", ): - Array.create( - store, shape=5, chunk_shape=5, dtype=" None: @pytest.mark.parametrize("zarr_format", [3]) def test_update_attrs(store: IcechunkStore, zarr_format: int) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 - arr = Array.create(store=store, shape=5, chunk_shape=5, dtype="f8", zarr_format=zarr_format) + arr = Array.create( + store=store, shape=5, chunk_shape=5, dtype="f8", zarr_format=zarr_format + ) arr.attrs["foo"] = "bar" assert arr.attrs["foo"] == "bar" arr2 = zarr.open_array(store=store, zarr_format=zarr_format) - assert arr2.attrs["foo"] == "bar" \ No newline at end of file + assert arr2.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_resize_1d(store: IcechunkStore, zarr_format: int) -> None: + z = zarr.create( + shape=105, + chunks=10, + dtype="i4", + fill_value=0, + store=store, + zarr_format=zarr_format, + ) + a = np.arange(105, dtype="i4") + z[:] = a + assert (105,) == z.shape + assert (105,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + z.resize(205) + assert (205,) == z.shape + assert (205,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:105]) + np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:]) + + z.resize(55) + assert (55,) == z.shape + assert (55,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a[:55], z[:]) + + # via shape setter + new_shape = (105,) + z.shape = new_shape + assert new_shape == z.shape + assert new_shape == z[:].shape + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_resize_2d(store: IcechunkStore, zarr_format: int) -> None: + z = zarr.create( + shape=(105, 105), + chunks=(10, 10), + dtype="i4", + fill_value=0, + store=store, + zarr_format=zarr_format, + ) + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z[:] = a + assert (105, 105) == z.shape + assert (105, 105) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + z.resize((205, 205)) + assert (205, 205) == z.shape + assert (205, 205) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:105, :105]) + np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :]) + np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:]) + + z.resize((55, 55)) + assert (55, 55) == z.shape + assert (55, 55) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:55, :55], z[:]) + + z.resize((55, 1)) + assert (55, 1) == z.shape + assert (55, 1) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:55, :1], z[:]) + + z.resize((1, 55)) + assert (1, 55) == z.shape + assert (1, 55) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:1, :10], z[:, :10]) + np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55]) + + # via shape setter + new_shape = (105, 105) + z.shape = new_shape + assert new_shape == z.shape + assert new_shape == z[:].shape + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_append_1d(store: IcechunkStore, zarr_format: int) -> None: + a = np.arange(105) + z = zarr.create( + shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + b = np.arange(105, 205) + e = np.append(a, b) + assert z.shape == (105,) + z.append(b) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(e, z[:]) + + # check append handles array-like + c = [1, 2, 3] + f = np.append(e, c) + z.append(c) + assert f.shape == z.shape + assert f.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(f, z[:]) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_append_2d(store: IcechunkStore, zarr_format: int) -> None: + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z = zarr.create( + shape=a.shape, + chunks=(10, 10), + dtype=a.dtype, + store=store, + zarr_format=zarr_format, + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10, 10) == z.chunks + actual = z[:] + np.testing.assert_array_equal(a, actual) + + b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) + e = np.append(a, b, axis=0) + z.append(b) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10, 10) == z.chunks + actual = z[:] + np.testing.assert_array_equal(e, actual) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_append_2d_axis(store: IcechunkStore, zarr_format: int) -> None: + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z = zarr.create( + shape=a.shape, + chunks=(10, 10), + dtype=a.dtype, + store=store, + zarr_format=zarr_format, + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) + e = np.append(a, b, axis=1) + z.append(b, axis=1) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(e, z[:]) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [3]) +def test_append_bad_shape(store: IcechunkStore, zarr_format: int) -> None: + a = np.arange(100) + z = zarr.create( + shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format + ) + z[:] = a + b = a.reshape(10, 10) + with pytest.raises(ValueError): + z.append(b) + + +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [3]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_array_create_order( + order: MemoryOrder | None, zarr_format: int, store: IcechunkStore +) -> None: + arr = Array.create( + store=store, shape=(2, 2), order=order, zarr_format=zarr_format, dtype="i4" + ) + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + +@pytest.mark.parametrize( + ("fill_value", "expected"), + [ + (np.nan * 1j, ["NaN", "NaN"]), + (np.nan, ["NaN", 0.0]), + (np.inf, ["Infinity", 0.0]), + (np.inf * 1j, ["NaN", "Infinity"]), + (-np.inf, ["-Infinity", 0.0]), + (math.inf, ["Infinity", 0.0]), + ], +) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +async def test_special_complex_fill_values_roundtrip( + store: IcechunkStore, fill_value: Any, expected: list[Any] +) -> None: + Array.create(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value) + content = await store.get("zarr.json", prototype=default_buffer_prototype()) + assert content is not None + actual = json.loads(content.to_bytes()) + pytest.xfail("IcechunkStore does not support complex fill types") + assert actual["fill_value"] == expected diff --git a/icechunk-python/tests/test_zarr/test_group.py b/icechunk-python/tests/test_zarr/test_group.py index 6ea932a3..054c5dce 100644 --- a/icechunk-python/tests/test_zarr/test_group.py +++ b/icechunk-python/tests/test_zarr/test_group.py @@ -1,5 +1,6 @@ from __future__ import annotations +import operator from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np @@ -9,6 +10,7 @@ import zarr.api.asynchronous from icechunk import IcechunkStore from zarr import Array, AsyncArray, AsyncGroup, Group +from zarr.core._info import GroupInfo from zarr.core.buffer import default_buffer_prototype from zarr.core.common import JSON, ZarrFormat from zarr.core.group import GroupMetadata @@ -286,6 +288,10 @@ def test_group_open( store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok ) else: + if not store.supports_deletes: + pytest.skip( + "Store does not support deletes but `exists_ok` is True, requiring deletes to override a group" + ) group_created_again = Group.from_store( store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok ) @@ -373,8 +379,25 @@ def test_group_setitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None: Test the `Group.__setitem__` method. """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - group["key"] = 10 + arr = np.ones((2, 4)) + group["key"] = arr + assert list(group.array_keys()) == ["key"] + assert group["key"].shape == (2, 4) + np.testing.assert_array_equal(group["key"][:], arr) + + if store.supports_deletes: + key = "key" + else: + # overwriting with another array requires deletes + # for stores that don't support this, we just use a new key + key = "key2" + + # overwrite with another array + arr = np.zeros((3, 5)) + group[key] = arr + assert key in list(group.array_keys()) + assert group[key].shape == (3, 5) + np.testing.assert_array_equal(group[key], arr) def test_group_contains(store: IcechunkStore, zarr_format: ZarrFormat) -> None: @@ -581,15 +604,6 @@ async def test_asyncgroup_attrs(store: IcechunkStore, zarr_format: ZarrFormat) - assert agroup.attrs == agroup.metadata.attributes == attributes -async def test_asyncgroup_info(store: IcechunkStore, zarr_format: ZarrFormat) -> None: - agroup = await AsyncGroup.from_store( # noqa - store, - zarr_format=zarr_format, - ) - pytest.xfail("Info is not implemented for metadata yet") - # assert agroup.info == agroup.metadata.info - - async def test_asyncgroup_open( store: IcechunkStore, zarr_format: ZarrFormat, @@ -793,7 +807,7 @@ async def test_group_members_async(store: IcechunkStore) -> None: g2 = await g1.create_group("g2") # immediate children - children = sorted([x async for x in group.members()], key=lambda x: x[0]) + children = sorted([x async for x in group.members()], key=operator.itemgetter(0)) assert children == [ ("a0", a0), ("g0", g0), @@ -803,7 +817,7 @@ async def test_group_members_async(store: IcechunkStore) -> None: assert nmembers == 2 # partial - children = sorted([x async for x in group.members(max_depth=1)], key=lambda x: x[0]) + children = sorted([x async for x in group.members(max_depth=1)], key=operator.itemgetter(0)) expected = [ ("a0", a0), ("g0", g0), @@ -816,7 +830,7 @@ async def test_group_members_async(store: IcechunkStore) -> None: # all children all_children = sorted( - [x async for x in group.members(max_depth=None)], key=lambda x: x[0] + [x async for x in group.members(max_depth=None)], key=operator.itemgetter(0) ) expected = [ ("a0", a0), @@ -846,7 +860,9 @@ async def test_require_group(store: IcechunkStore, zarr_format: ZarrFormat) -> N assert foo_group.attrs == {"foo": 100} # test that we can get the group using require_group and overwrite=True - foo_group = await root.require_group("foo", overwrite=True) + if store.supports_deletes: + foo_group = await root.require_group("foo", overwrite=True) + assert foo_group.attrs == {} _ = await foo_group.create_array( "bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} @@ -935,3 +951,46 @@ def test_from_dict_extra_fields(self): result = GroupMetadata.from_dict(data) expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2) assert result == expected + + +class TestInfo: + def test_info(self, store: IcechunkStore) -> None: + A = zarr.group(store=store, path="A") + B = A.create_group(name="B") + + B.create_array(name="x", shape=(1,)) + B.create_array(name="y", shape=(2,)) + + result = A.info + expected = GroupInfo( + _name="A", + _read_only=False, + _store_type="IcechunkStore", + _zarr_format=3, + ) + assert result == expected + + result = A.info_complete() + expected = GroupInfo( + _name="A", + _read_only=False, + _store_type="IcechunkStore", + _zarr_format=3, + _count_members=3, + _count_arrays=2, + _count_groups=1, + ) + assert result == expected + + +async def test_delitem_removes_children(store: IcechunkStore, zarr_format: ZarrFormat) -> None: + # https://github.com/zarr-developers/zarr-python/issues/2191 + g1 = zarr.group(store=store, zarr_format=zarr_format) + g1.create_group("0") + g1.create_group("0/0") + arr = g1.create_array("0/0/0", shape=(1,)) + arr[:] = 1 + + del g1["0"] + with pytest.raises(KeyError): + g1["0/0"] \ No newline at end of file diff --git a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py index fcad0d19..ffc780af 100644 --- a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py +++ b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py @@ -1,12 +1,10 @@ from __future__ import annotations -from typing import Any, cast +from typing import Any import pytest from icechunk import IcechunkStore, StorageConfig -from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype -from zarr.core.common import AccessModeLiteral from zarr.core.sync import collect_aiterator from zarr.testing.store import StoreTests @@ -23,10 +21,6 @@ class TestIcechunkStore(StoreTests[IcechunkStore, cpu.Buffer]): store_cls = IcechunkStore buffer_cls = cpu.Buffer - @pytest.mark.xfail(reason="not implemented", strict=False) - def test_store_eq(self, store: IcechunkStore, store_kwargs: dict[str, Any]) -> None: - pass - async def set(self, store: IcechunkStore, key: str, value: Buffer) -> None: await store._store.set(key, value.to_bytes()) @@ -46,47 +40,48 @@ async def get(self, store: IcechunkStore, key: str) -> Buffer: def store_kwargs(self, tmpdir) -> dict[str, Any]: kwargs = { "storage": StorageConfig.filesystem(f"{tmpdir}/store_test"), - "mode": "w", + "read_only": False, } return kwargs - @pytest.fixture(scope="function") + @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> IcechunkStore: return IcechunkStore.open_or_create(**store_kwargs) + + def test_store_eq(self, store: IcechunkStore, store_kwargs: dict[str, Any]) -> None: + # check self equality + assert store == store + + # check store equality with same inputs + # asserting this is important for being able to compare (de)serialized stores + store2 = self.store_cls.open_existing(**store_kwargs) + assert store == store2 @pytest.mark.xfail(reason="Not implemented") def test_store_repr(self, store: IcechunkStore) -> None: super().test_store_repr(store) - def test_store_mode(self, store, store_kwargs: dict[str, Any]) -> None: - assert store.mode == AccessMode.from_literal("w") - assert not store.mode.readonly - - @pytest.mark.parametrize("mode", ["r", "r+", "a", "w", "w-"]) - def test_store_open_mode( - self, store_kwargs: dict[str, Any], mode: AccessModeLiteral + @pytest.mark.parametrize("read_only", [True, False]) + async def test_store_open_read_only( + self, store: IcechunkStore, store_kwargs: dict[str, Any], read_only: bool ) -> None: - store_kwargs["mode"] = mode - try: - store = self.store_cls.open_or_create(**store_kwargs) - assert store._is_open - assert store.mode == AccessMode.from_literal(mode) - except Exception: - assert 'r' in mode - - async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: - create_kwargs = {**store_kwargs, "mode": "r"} - with pytest.raises(ValueError): - _store = self.store_cls.open_or_create(**create_kwargs) + store_kwargs["read_only"] = read_only + store = await self.store_cls.open(**store_kwargs) + assert store._is_open + assert store.read_only == read_only + + async def test_read_only_store_raises(self, store: IcechunkStore, store_kwargs: dict[str, Any]) -> None: + kwargs = {**store_kwargs, "read_only": True} + store = await self.store_cls.open(**kwargs) + assert store.read_only - # TODO # set - # with pytest.raises(ValueError): - # await store.set("foo", self.buffer_cls.from_bytes(b"bar")) + with pytest.raises(ValueError): + await store.set("foo", self.buffer_cls.from_bytes(b"bar")) - # # delete - # with pytest.raises(ValueError): - # await store.delete("foo") + # delete + with pytest.raises(ValueError): + await store.delete("foo") async def test_set_many(self, store: IcechunkStore) -> None: """ @@ -152,15 +147,6 @@ async def test_exists(self, store: IcechunkStore) -> None: ) assert await store.exists("foo/zarr.json") - async def test_empty(self, store: IcechunkStore) -> None: - assert await store.empty() - - await store.set( - "foo/zarr.json", - self.buffer_cls.from_bytes(DEFAULT_GROUP_METADATA), - ) - assert not await store.empty() - async def test_list(self, store: IcechunkStore) -> None: assert [k async for k in store.list()] == [] await store.set( @@ -264,42 +250,6 @@ async def test_get_many(self, store: IcechunkStore) -> None: expected_kvs = sorted(((k, b) for k, b in zip(keys, values, strict=False))) assert observed_kvs == expected_kvs - async def test_with_mode(self, store: IcechunkStore) -> None: - data = b"0000" - await self.set(store, "zarr.json", self.buffer_cls.from_bytes(ARRAY_METADATA)) - await self.set(store, "c/0/0/0", self.buffer_cls.from_bytes(data)) - assert (await self.get(store, "c/0/0/0")).to_bytes() == data - - for mode in ["r", "a"]: - mode = cast(AccessModeLiteral, mode) - clone = store.with_mode(mode) - # await store.close() - await clone._ensure_open() - assert clone.mode == AccessMode.from_literal(mode) - assert isinstance(clone, type(store)) - - # earlier writes are visible - result = await clone.get("c/0/0/0", default_buffer_prototype()) - assert result is not None - assert result.to_bytes() == data - - # writes to original after with_mode is visible - await self.set(store, "c/0/0/1", self.buffer_cls.from_bytes(data)) - result = await clone.get("c/0/0/1", default_buffer_prototype()) - assert result is not None - assert result.to_bytes() == data - - if mode == "a": - # writes to clone is visible in the original - await clone.set("c/0/1/0", self.buffer_cls.from_bytes(data)) - result = await clone.get("c/0/1/0", default_buffer_prototype()) - assert result is not None - assert result.to_bytes() == data - - else: - with pytest.raises(ValueError, match="store error: cannot write"): - await clone.set("c/0/1/0", self.buffer_cls.from_bytes(data)) - async def test_set_if_not_exists(self, store: IcechunkStore) -> None: key = "zarr.json" data_buf = self.buffer_cls.from_bytes(ARRAY_METADATA) @@ -317,3 +267,62 @@ async def test_set_if_not_exists(self, store: IcechunkStore) -> None: result = await store.get("c/0/0/0", default_buffer_prototype()) assert result == new + + async def test_is_empty(self, store: IcechunkStore) -> None: + assert await store.is_empty("") + await self.set( + store, "foo/bar/zarr.json", self.buffer_cls.from_bytes(DEFAULT_GROUP_METADATA) + ) + assert not await store.is_empty("") + assert await store.is_empty("fo") # codespell:ignore + assert not await store.is_empty("foo/") + assert not await store.is_empty("foo") + assert await store.is_empty("spam/") + + async def test_delete_dir(self, store: IcechunkStore) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + await store.set("zarr.json", self.buffer_cls.from_bytes(DEFAULT_GROUP_METADATA)) + await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(DEFAULT_GROUP_METADATA)) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(ARRAY_METADATA)) + await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chun")) + await store.delete_dir("foo") + assert await store.exists("zarr.json") + assert await store.exists("foo-bar/zarr.json") + assert not await store.exists("foo/zarr.json") + assert not await store.exists("foo/c/0") + + async def test_getsize(self, store: IcechunkStore) -> None: + key = "k/zarr.json" + data = self.buffer_cls.from_bytes(DEFAULT_GROUP_METADATA) + await self.set(store, key, data) + + result = await store.getsize(key) + assert isinstance(result, int) + assert result == len(DEFAULT_GROUP_METADATA) + + async def test_getsize_prefix(self, store: IcechunkStore) -> None: + prefix = "array" + await store.set(f"{prefix}/zarr.json", self.buffer_cls.from_bytes(ARRAY_METADATA)) + + keys = [ + f"{prefix}/c/0/0/0", + f"{prefix}/c/0/0/1", + f"{prefix}/c/0/1/0", + f"{prefix}/c/0/1/1", + f"{prefix}/c/1/0/0", + f"{prefix}/c/1/0/1", + f"{prefix}/c/1/1/0", + f"{prefix}/c/1/1/1", + ] + values = [bytes(i) for i, _ in enumerate(keys)] + for k, v in zip(keys, values, strict=False): + await self.set(store, k, self.buffer_cls.from_bytes(v)) + + result = await store.getsize_prefix(prefix) + assert isinstance(result, int) + assert result == sum(len(v) for v in values) + len(ARRAY_METADATA) + + async def test_getsize_raises(self, store: IcechunkStore) -> None: + with pytest.raises(ValueError): + await store.getsize("not-a-real-key") \ No newline at end of file diff --git a/icechunk/Cargo.toml b/icechunk/Cargo.toml index 57e285d0..91e87f7a 100644 --- a/icechunk/Cargo.toml +++ b/icechunk/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icechunk" -version = "0.1.0-alpha.4" +version = "0.1.0-alpha.5" description = "Transactional storage engine for Zarr designed for use on cloud object storage" readme = "../README.md" repository = "https://github.com/earth-mover/icechunk" diff --git a/icechunk/src/repository.rs b/icechunk/src/repository.rs index 43dd7a58..71767a91 100644 --- a/icechunk/src/repository.rs +++ b/icechunk/src/repository.rs @@ -426,7 +426,7 @@ impl Repository { ) -> RepositoryResult<()> { self.get_array(&path) .await - .map(|node| self.change_set.set_chunk_ref(node.id, coord, data)) + .map(|node: NodeSnapshot| self.change_set.set_chunk_ref(node.id, coord, data)) } pub async fn get_node(&self, path: &Path) -> RepositoryResult { diff --git a/icechunk/src/zarr.rs b/icechunk/src/zarr.rs index 11368c38..1e8d1ca0 100644 --- a/icechunk/src/zarr.rs +++ b/icechunk/src/zarr.rs @@ -527,9 +527,9 @@ impl Store { Ok(self.repository.read().await.change_set_bytes()?) } - pub async fn empty(&self) -> StoreResult { - let res = self.repository.read().await.list_nodes().await?.next().is_none(); - Ok(res) + pub async fn is_empty(&self, prefix: &str) -> StoreResult { + let res = self.list_dir(prefix).await?.next().await; + Ok(res.is_none()) } pub async fn clear(&mut self) -> StoreResult<()> { @@ -720,9 +720,14 @@ impl Store { // to avoid race conditions with other writers // (remember this method takes &self and not &mut self) let mut guard = self.repository.write().await; - let node = guard.get_node(&node_path).await.map_err(|_| { - KeyNotFoundError::NodeNotFound { path: node_path.clone() } - })?; + let node = guard.get_node(&node_path).await; + + // When there is no node at the given key, we don't consider it an error, instead we just do nothing + if let Err(RepositoryError::NodeNotFound { path: _, message: _ }) = node { + return Ok(()); + }; + + let node = node.map_err(StoreError::RepositoryError)?; match node.node_data { NodeData::Array(_, _) => { Ok(guard.deref_mut().delete_array(node_path).await?) @@ -735,7 +740,14 @@ impl Store { Key::Chunk { node_path, coords } => { let mut guard = self.repository.write().await; let repository = guard.deref_mut(); - Ok(repository.set_chunk_ref(node_path, coords, None).await?) + match repository.set_chunk_ref(node_path, coords, None).await { + Ok(_) => Ok(()), + Err(RepositoryError::NodeNotFound { path: _, message: _ }) => { + // When there is no chunk at the given key, we don't consider it an error, instead we just do nothing + Ok(()) + } + Err(err) => Err(StoreError::RepositoryError(err)), + } } Key::ZarrV2(_) => Ok(()), } @@ -1032,6 +1044,10 @@ async fn exists(key: &str, repo: &Repository) -> StoreResult { match get_key(key, &ByteRange::ALL, repo).await { Ok(_) => Ok(true), Err(StoreError::NotFound(_)) => Ok(false), + Err(StoreError::RepositoryError(RepositoryError::NodeNotFound { + path: _, + message: _, + })) => Ok(false), Err(other_error) => Err(other_error), } } @@ -1780,6 +1796,9 @@ mod tests { Err(StoreError::NotFound(KeyNotFoundError::NodeNotFound { path })) if path.to_string() == "/array", )); + // Deleting a non-existent key should not fail + store.delete("array/zarr.json").await.unwrap(); + store.set("array/zarr.json", zarr_meta.clone()).await.unwrap(); store.delete("array/zarr.json").await.unwrap(); assert!(matches!( @@ -1948,7 +1967,7 @@ mod tests { None, ); - assert!(store.empty().await.unwrap()); + assert!(store.is_empty("").await.unwrap()); assert!(!store.exists("zarr.json").await.unwrap()); assert_eq!(all_keys(&store).await.unwrap(), Vec::::new()); @@ -1960,7 +1979,7 @@ mod tests { ) .await?; - assert!(!store.empty().await.unwrap()); + assert!(!store.is_empty("").await.unwrap()); assert!(store.exists("zarr.json").await.unwrap()); assert_eq!(all_keys(&store).await.unwrap(), vec!["zarr.json".to_string()]); store @@ -1981,7 +2000,7 @@ mod tests { let zarr_meta = Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"array","attributes":{"foo":42},"shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"#); store.set("group/array/zarr.json", zarr_meta).await?; - assert!(!store.empty().await.unwrap()); + assert!(!store.is_empty("").await.unwrap()); assert!(store.exists("zarr.json").await.unwrap()); assert!(store.exists("group/array/zarr.json").await.unwrap()); assert!(store.exists("group/zarr.json").await.unwrap());