Skip to content

Commit b7fd358

Browse files
committed
Make work again for arrow
1 parent b7da89f commit b7fd358

File tree

3 files changed

+15
-11
lines changed

3 files changed

+15
-11
lines changed

fsspec/caching.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ def __init__(
628628
fetcher: Fetcher,
629629
size: int,
630630
data: dict[tuple[int, int], bytes] | None = None,
631-
strict: bool = True,
631+
strict: bool = False,
632632
**_: Any,
633633
):
634634
super().__init__(blocksize, fetcher, size)
@@ -659,20 +659,30 @@ def _fetch(self, start: int | None, stop: int | None) -> bytes:
659659
stop = self.size
660660

661661
out = b""
662+
started = False
663+
loc_old = 0
662664
for loc0, loc1 in sorted(self.data):
663665
if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
664666
# entirely within the block
665667
off = start - loc0
666668
return self.data[(loc0, loc1)][off : off + stop - start]
669+
if started and loc0 > loc_old:
670+
# a gap where we need data
671+
if self.strict:
672+
raise ValueError
673+
out += b"\x00" * (loc0 - loc_old)
667674
if loc0 <= start < loc1:
668675
# found the start
669676
off = start - loc0
670677
out = self.data[(loc0, loc1)][off : off + stop - start]
671-
if start < loc0 and stop > loc1:
678+
started = True
679+
elif start < loc0 and stop > loc1:
672680
# the whole block
673681
out += self.data[(loc0, loc1)]
674-
if loc0 <= stop <= loc1:
682+
elif loc0 <= stop <= loc1:
683+
# end block
675684
return out + self.data[(loc0, loc1)][: stop - loc0]
685+
loc_old = loc1
676686
raise ValueError
677687

678688

fsspec/parquet.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ class AlreadyBufferedFile(AbstractBufferedFile):
1919
def _fetch_range(self, start, end):
2020
raise NotImplementedError
2121

22-
def close(self):
23-
pass
24-
2522

2623
def open_parquet_file(
2724
path,

fsspec/tests/test_parquet.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
@pytest.fixture(
2323
params=[
24-
pytest.param("fastparquet", marks=FASTPARQUET_MARK),
24+
# pytest.param("fastparquet", marks=FASTPARQUET_MARK),
2525
pytest.param("pyarrow", marks=PYARROW_MARK),
2626
]
2727
)
@@ -40,8 +40,6 @@ def test_open_parquet_file(
4040
):
4141
# Pandas required for this test
4242
pd = pytest.importorskip("pandas")
43-
if engine != "fastparquet":
44-
return
4543
if columns == ["z"] and engine == "fastparquet":
4644
columns = ["z.a"] # fastparquet is more specific
4745

@@ -135,10 +133,9 @@ def test_open_parquet_file(
135133
result = pd.read_parquet(f, columns=columns, engine=engine)
136134
pd.testing.assert_frame_equal(expect, result)
137135
elif engine == "pyarrow":
138-
# Should raise ValueError for "pyarrow"
139136
import pyarrow
140137

141-
with pytest.raises((ValueError, pyarrow.ArrowError)):
138+
with pytest.raises((ValueError, pyarrow.ArrowException)):
142139
open_parquet_file(
143140
path,
144141
metadata=["Not-None"],

0 commit comments

Comments
 (0)