Skip to content

Commit c3b7c32

Browse files
Fix pyarrow dataset import in _api (#119)
* fix pyarrow dataset import * move batch size param to to_batches --------- Co-authored-by: Liam Morrison <[email protected]>
1 parent 3b74a95 commit c3b7c32

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

stac_geoparquet/arrow/_api.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import psutil
1313
import pyarrow as pa
1414
import pystac
15+
from pyarrow.dataset import dataset
1516

1617
from stac_geoparquet.arrow._batch import StacArrowBatch, StacJsonBatch
1718
from stac_geoparquet.arrow._constants import (
@@ -136,11 +137,9 @@ def parse_stac_items_to_arrow(
136137
output_path=fname,
137138
)
138139
memlog(f"Batch {cnt}")
139-
ds = pa.dataset.dataset(
140-
tmpdir, schema=schema, format="parquet", batch_size=chunk_size
141-
)
140+
ds = dataset(tmpdir, schema=schema, format="parquet")
142141
memlog("Created Dataset")
143-
batches = ds.to_batches()
142+
batches = ds.to_batches(batch_size=chunk_size)
144143
memlog("Created Batches")
145144
return pa.RecordBatchReader.from_batches(schema, batches)
146145

0 commit comments

Comments
 (0)