Skip to content

Commit dae0e80

Browse files
Allow user override of seekable parameter in ArrowFSWrapper methods (#1950)
1 parent dbc633e commit dae0e80

File tree

2 files changed

+90
-2
lines changed

2 files changed

+90
-2
lines changed

fsspec/implementations/arrow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,11 @@ def modified(self, path):
205205
return self.fs.get_file_info(path).mtime
206206

207207
def cat_file(self, path, start=None, end=None, **kwargs):
208-
kwargs["seekable"] = start not in [None, 0]
208+
kwargs.setdefault("seekable", start not in [None, 0])
209209
return super().cat_file(path, start=None, end=None, **kwargs)
210210

211211
def get_file(self, rpath, lpath, **kwargs):
212-
kwargs["seekable"] = False
212+
kwargs.setdefault("seekable", False)
213213
super().get_file(rpath, lpath, **kwargs)
214214

215215

fsspec/implementations/tests/test_arrow.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,91 @@ def test_get_kwargs_from_urls_hadoop_fs():
268268
assert kwargs["host"] == "localhost"
269269
assert kwargs["port"] == 8020
270270
assert "replication" not in kwargs
271+
272+
273+
def test_get_file_seekable_default(fs, remote_dir, tmp_path):
274+
"""Test that get_file defaults to seekable=False but allows override."""
275+
data = b"test data for seekable"
276+
277+
# Create a test file
278+
with fs.open(remote_dir + "/test_file.txt", "wb") as f:
279+
f.write(data)
280+
281+
# Test default behavior (seekable=False)
282+
local_file = tmp_path / "test_default.txt"
283+
fs.get_file(remote_dir + "/test_file.txt", str(local_file))
284+
with open(local_file, "rb") as f:
285+
assert f.read() == data
286+
287+
# Test with explicit seekable=True
288+
local_file_seekable = tmp_path / "test_seekable.txt"
289+
fs.get_file(remote_dir + "/test_file.txt", str(local_file_seekable), seekable=True)
290+
with open(local_file_seekable, "rb") as f:
291+
assert f.read() == data
292+
293+
# Test with explicit seekable=False
294+
local_file_not_seekable = tmp_path / "test_not_seekable.txt"
295+
fs.get_file(
296+
remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False
297+
)
298+
with open(local_file_not_seekable, "rb") as f:
299+
assert f.read() == data
300+
301+
302+
def test_cat_file_seekable_override(fs, remote_dir):
303+
"""Test that cat_file allows seekable to be overridden."""
304+
data = b"test data for cat_file seekable"
305+
306+
# Create a test file
307+
with fs.open(remote_dir + "/test_cat.txt", "wb") as f:
308+
f.write(data)
309+
310+
# Test default behavior - when start is None, seekable should default to False
311+
result = fs.cat_file(remote_dir + "/test_cat.txt")
312+
assert result == data
313+
314+
# Test with explicit seekable=True even when start is None
315+
result = fs.cat_file(remote_dir + "/test_cat.txt", seekable=True)
316+
assert result == data
317+
318+
# Test with explicit seekable=False
319+
result = fs.cat_file(remote_dir + "/test_cat.txt", seekable=False)
320+
assert result == data
321+
322+
323+
def test_seekable_true_allows_size_method(fs, remote_dir):
324+
"""Test that size() method works when seekable=True."""
325+
data = b"test data for size method" * 10
326+
327+
# Create a test file
328+
test_file = remote_dir + "/test_size.txt"
329+
with fs.open(test_file, "wb") as f:
330+
f.write(data)
331+
332+
# Open with seekable=True - size() should work
333+
with fs.open(test_file, "rb", seekable=True) as f:
334+
assert f.seekable() is True
335+
# Verify size() method works and returns correct size
336+
file_size = f.size()
337+
assert file_size == len(data)
338+
# Also verify we can read the data
339+
assert f.read() == data
340+
341+
342+
def test_seekable_false_prevents_size_method(fs, remote_dir):
343+
"""Test that size() method raises OSError when seekable=False."""
344+
data = b"test data for size method" * 10
345+
346+
# Create a test file
347+
test_file = remote_dir + "/test_size.txt"
348+
with fs.open(test_file, "wb") as f:
349+
f.write(data)
350+
351+
# Open with seekable=False - size() should raise OSError
352+
with fs.open(test_file, "rb", seekable=False) as f:
353+
assert f.seekable() is False
354+
# Verify size() raises OSError
355+
with pytest.raises(OSError, match="only valid on seekable files"):
356+
f.size()
357+
# Verify we can still read the data
358+
assert f.read() == data

0 commit comments

Comments
 (0)