-
Notifications
You must be signed in to change notification settings - Fork 35
Open
Description
Consider the following change
--- a/datafusion_iceberg/examples/insert_table.rs
+++ b/datafusion_iceberg/examples/insert_table.rs
@@ -16,7 +16,7 @@ use std::sync::Arc;
#[tokio::main]
pub(crate) async fn main() {
- let object_store = ObjectStoreBuilder::memory();
+ let object_store = ObjectStoreBuilder::filesystem("/Users/gruuya/warehouse");
let catalog: Arc<dyn Catalog> = Arc::new(
SqlCatalog::new("sqlite://", "test", object_store)Whilst this works
warehouse % tree test
test
└── orders
├── data
│ ├── day=18262
│ │ └── ef5cd64c-7362-11f0-8000-b000b2e9ea86.parquet
│ ├── day=18264
│ │ └── ef60de2c-7362-11f0-b39d-e746482efe9c.parquet
│ └── day=18294
│ └── ef5cd656-7362-11f0-b39c-e0cc5b60165e.parquet
└── metadata
├── 00000-b7b6b0c3-9c44-4e24-978d-34fb900e3246.metadata.json
├── 00001-3556f1e8-2425-4737-94a0-a86847927bdc.metadata.json
├── 00002-6686c10a-9ffd-4b64-98f7-3f670e9dd9e5.metadata.json
├── 27e64015-4164-4973-8700-95a822b858b2-m0.avro
├── 87eda61c-9f0a-4d94-8b7d-3086c4987914-m0.avro
├── snap-1477384046236171598-0-87eda61c-9f0a-4d94-8b7d-3086c4987914.avro
├── snap-4643981801262411697-0-27e64015-4164-4973-8700-95a822b858b2.avro
└── version-hint.text
7 directories, 11 filesit breaks external readers, for instance in DuckDB
D select * from iceberg_scan('/Users/gruuya/warehouse/test/orders/metadata/00002-6686c10a-9ffd-4b64-98f7-3f670e9dd9e5.metadata.json');
IO Error:
Cannot open file "/test/orders/metadata/snap-1477384046236171598-0-87eda61c-9f0a-4d94-8b7d-3086c4987914.avro": No such file or directoryThis is because the json and avro files end up having relative instead of absolute paths, which is not (yet) allowed by the spec
warehouse % jq . test/orders/metadata/00002-6686c10a-9ffd-4b64-98f7-3f670e9dd9e5.metadata.json | grep avro
"manifest-list": "/test/orders/metadata/snap-1477384046236171598-0-87eda61c-9f0a-4d94-8b7d-3086c4987914.avro",
"manifest-list": "/test/orders/metadata/snap-4643981801262411697-0-27e64015-4164-4973-8700-95a822b858b2.avro",
warehouse % fastavro test/orders/metadata/snap-1477384046236171598-0-87eda61c-9f0a-4d94-8b7d-3086c4987914.avro
{"manifest_path": "/test/orders/metadata/87eda61c-9f0a-4d94-8b7d-3086c4987914-m0.avro", "manifest_length": 7615,
...
warehouse % fastavro test/orders/metadata/87eda61c-9f0a-4d94-8b7d-3086c4987914-m0.avro
... "data_file": {"content": 0, "file_path": "/test/orders/data/day=18262/ef5cd64c-7362-11f0-8000-b000b2e9ea86.parquet", "file_format": "PARQUET",
...
This is probably due to all other variants of ObjectStoreBuilder wrapping non-prefixed object stores, and accepting an absolute url in with_location of the table builder, whereas the Filesystem variant with a prefix works with relative paths.
Metadata
Metadata
Assignees
Labels
No labels